131 行
4.8 KiB
JavaScript
131 行
4.8 KiB
JavaScript
|
|
// server/utils/recognitionProcessor.js
|
||
|
|
import TextDirectionClassifier from './textDirectionClassifier.js';
|
||
|
|
import TextRecognizer from './textRecognizer.js';
|
||
|
|
import TextRegionCropper from './textRegionCropper.js';
|
||
|
|
|
||
|
|
class RecognitionProcessor {
|
||
|
|
constructor() {
|
||
|
|
this.recSession = null;
|
||
|
|
this.clsSession = null;
|
||
|
|
this.config = null;
|
||
|
|
|
||
|
|
this.textDirectionClassifier = new TextDirectionClassifier();
|
||
|
|
this.textRecognizer = new TextRecognizer();
|
||
|
|
this.textRegionCropper = new TextRegionCropper();
|
||
|
|
}
|
||
|
|
|
||
|
|
initialize(recSession, clsSession, config) {
|
||
|
|
this.recSession = recSession;
|
||
|
|
this.clsSession = clsSession;
|
||
|
|
this.config = config;
|
||
|
|
|
||
|
|
this.textDirectionClassifier.initialize(clsSession, config);
|
||
|
|
this.textRecognizer.initialize(recSession, config);
|
||
|
|
}
|
||
|
|
|
||
|
|
async loadCharacterSet(keysPath) {
|
||
|
|
await this.textRecognizer.loadCharacterSet(keysPath);
|
||
|
|
}
|
||
|
|
|
||
|
|
getCharacterSetSize() {
|
||
|
|
return this.textRecognizer.getCharacterSetSize();
|
||
|
|
}
|
||
|
|
|
||
|
|
async recognizeTextWithCls(processedImage, textBoxes) {
|
||
|
|
const results = [];
|
||
|
|
|
||
|
|
try {
|
||
|
|
console.log(`🔄 开始处理 ${textBoxes.length} 个文本区域`);
|
||
|
|
|
||
|
|
for (let i = 0; i < textBoxes.length; i++) {
|
||
|
|
const box = textBoxes[i];
|
||
|
|
|
||
|
|
try {
|
||
|
|
console.log(`\n📦 处理区域 ${i + 1}/${textBoxes.length}, 置信度: ${box.confidence.toFixed(4)}`);
|
||
|
|
|
||
|
|
const textRegion = await this.textRegionCropper.cropTextRegion(
|
||
|
|
processedImage.buffer, box, i + 1
|
||
|
|
);
|
||
|
|
if (!textRegion) {
|
||
|
|
console.log(`⏭️ 区域 ${i + 1}: 跳过无效区域`);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
const { clsResult, clsConfidence } = await this.textDirectionClassifier.classifyTextDirection(
|
||
|
|
textRegion.buffer
|
||
|
|
);
|
||
|
|
|
||
|
|
let recognitionImage = textRegion.buffer;
|
||
|
|
if (clsResult === 180 && clsConfidence > this.config.clsThresh) {
|
||
|
|
console.log(`🔄 区域 ${i + 1}: 旋转 180°`);
|
||
|
|
recognitionImage = await this.textRegionCropper.rotateImage(textRegion.buffer, 180);
|
||
|
|
}
|
||
|
|
|
||
|
|
const textResult = await this.textRecognizer.recognizeText(recognitionImage);
|
||
|
|
|
||
|
|
if (textResult.text && textResult.text.trim().length > 0 && textResult.confidence > 0.05) {
|
||
|
|
const originalBox = this.scaleBoxToOriginalImage(box, processedImage);
|
||
|
|
|
||
|
|
results.push({
|
||
|
|
text: textResult.text.trim(),
|
||
|
|
confidence: textResult.confidence * clsConfidence,
|
||
|
|
box: originalBox,
|
||
|
|
clsResult,
|
||
|
|
clsConfidence,
|
||
|
|
regionIndex: i + 1
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log(`✅ 区域 ${i + 1}: 识别成功 "${textResult.text}"`);
|
||
|
|
} else {
|
||
|
|
console.log(`❌ 区域 ${i + 1}: 识别失败或置信度过低`);
|
||
|
|
}
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error(`💥 区域 ${i + 1}: 处理失败`, error.message);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`\n🎯 识别完成: ${results.length}/${textBoxes.length} 个区域成功`);
|
||
|
|
return results;
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error('❌ 整体识别失败:', error);
|
||
|
|
throw error;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
scaleBoxToOriginalImage(box, processedImage) {
|
||
|
|
const {
|
||
|
|
scaleX, scaleY,
|
||
|
|
paddingX, paddingY,
|
||
|
|
originalWidth, originalHeight
|
||
|
|
} = processedImage;
|
||
|
|
|
||
|
|
const paddedX1 = box.x1 * scaleX;
|
||
|
|
const paddedY1 = box.y1 * scaleY;
|
||
|
|
const paddedX3 = box.x3 * scaleX;
|
||
|
|
const paddedY3 = box.y3 * scaleY;
|
||
|
|
|
||
|
|
const originalX1 = paddedX1 - paddingX;
|
||
|
|
const originalY1 = paddedY1 - paddingY;
|
||
|
|
const originalX3 = paddedX3 - paddingX;
|
||
|
|
const originalY3 = paddedY3 - paddingY;
|
||
|
|
|
||
|
|
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
||
|
|
|
||
|
|
return {
|
||
|
|
x1: clamp(originalX1, originalWidth - 1),
|
||
|
|
y1: clamp(originalY1, originalHeight - 1),
|
||
|
|
x2: clamp(originalX3, originalWidth - 1),
|
||
|
|
y2: clamp(originalY1, originalHeight - 1),
|
||
|
|
x3: clamp(originalX3, originalWidth - 1),
|
||
|
|
y3: clamp(originalY3, originalHeight - 1),
|
||
|
|
x4: clamp(originalX1, originalWidth - 1),
|
||
|
|
y4: clamp(originalY3, originalHeight - 1),
|
||
|
|
confidence: box.confidence
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export default RecognitionProcessor;
|