// server/utils/recognitionProcessor.js import TextDirectionClassifier from './textDirectionClassifier.js'; import TextRecognizer from './textRecognizer.js'; import TextRegionCropper from './textRegionCropper.js'; class RecognitionProcessor { constructor() { this.recSession = null; this.clsSession = null; this.config = null; this.textDirectionClassifier = new TextDirectionClassifier(); this.textRecognizer = new TextRecognizer(); this.textRegionCropper = new TextRegionCropper(); } initialize(recSession, clsSession, config) { this.recSession = recSession; this.clsSession = clsSession; this.config = config; this.textDirectionClassifier.initialize(clsSession, config); this.textRecognizer.initialize(recSession, config); } async loadCharacterSet(keysPath) { await this.textRecognizer.loadCharacterSet(keysPath); } getCharacterSetSize() { return this.textRecognizer.getCharacterSetSize(); } async recognizeTextWithCls(processedImage, textBoxes) { const results = []; try { console.log(`🔄 开始处理 ${textBoxes.length} 个文本区域`); for (let i = 0; i < textBoxes.length; i++) { const box = textBoxes[i]; try { console.log(`\n📦 处理区域 ${i + 1}/${textBoxes.length}, 置信度: ${box.confidence.toFixed(4)}`); const textRegion = await this.textRegionCropper.cropTextRegion( processedImage.buffer, box, i + 1 ); if (!textRegion) { console.log(`⏭️ 区域 ${i + 1}: 跳过无效区域`); continue; } const { clsResult, clsConfidence } = await this.textDirectionClassifier.classifyTextDirection( textRegion.buffer ); let recognitionImage = textRegion.buffer; if (clsResult === 180 && clsConfidence > this.config.clsThresh) { console.log(`🔄 区域 ${i + 1}: 旋转 180°`); recognitionImage = await this.textRegionCropper.rotateImage(textRegion.buffer, 180); } const textResult = await this.textRecognizer.recognizeText(recognitionImage, i + 1); if (textResult.text && textResult.text.trim().length > 0 && textResult.confidence > 0.05) { const originalBox = this.scaleBoxToOriginalImage(box, processedImage); results.push({ text: textResult.text.trim(), confidence: textResult.confidence * clsConfidence, box: originalBox, clsResult, clsConfidence, regionIndex: i + 1 }); console.log(`✅ 区域 ${i + 1}: 识别成功 "${textResult.text}"`); } else { console.log(`❌ 区域 ${i + 1}: 识别失败或置信度过低`); } } catch (error) { console.error(`💥 区域 ${i + 1}: 处理失败`, error.message); continue; } } console.log(`\n🎯 识别完成: ${results.length}/${textBoxes.length} 个区域成功`); return results; } catch (error) { console.error('❌ 整体识别失败:', error); throw error; } } scaleBoxToOriginalImage(box, processedImage) { const { scaleX, scaleY, paddingX, paddingY, originalWidth, originalHeight } = processedImage; const paddedX1 = box.x1 * scaleX; const paddedY1 = box.y1 * scaleY; const paddedX3 = box.x3 * scaleX; const paddedY3 = box.y3 * scaleY; const originalX1 = paddedX1 - paddingX; const originalY1 = paddedY1 - paddingY; const originalX3 = paddedX3 - paddingX; const originalY3 = paddedY3 - paddingY; const clamp = (value, max) => Math.max(0, Math.min(max, value)); return { x1: clamp(originalX1, originalWidth - 1), y1: clamp(originalY1, originalHeight - 1), x2: clamp(originalX3, originalWidth - 1), y2: clamp(originalY1, originalHeight - 1), x3: clamp(originalX3, originalWidth - 1), y3: clamp(originalY3, originalHeight - 1), x4: clamp(originalX1, originalWidth - 1), y4: clamp(originalY3, originalHeight - 1), confidence: box.confidence }; } } export default RecognitionProcessor;