| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- // server/utils/recognitionProcessor.js
- import TextDirectionClassifier from './textDirectionClassifier.js';
- import TextRecognizer from './textRecognizer.js';
- import TextRegionCropper from './textRegionCropper.js';
- class RecognitionProcessor {
- constructor() {
- this.recSession = null;
- this.clsSession = null;
- this.config = null;
- this.textDirectionClassifier = new TextDirectionClassifier();
- this.textRecognizer = new TextRecognizer();
- this.textRegionCropper = new TextRegionCropper();
- }
- initialize(recSession, clsSession, config) {
- this.recSession = recSession;
- this.clsSession = clsSession;
- this.config = config;
- this.textDirectionClassifier.initialize(clsSession, config);
- this.textRecognizer.initialize(recSession, config);
- }
- async loadCharacterSet(keysPath) {
- await this.textRecognizer.loadCharacterSet(keysPath);
- }
- getCharacterSetSize() {
- return this.textRecognizer.getCharacterSetSize();
- }
- async recognizeTextWithCls(processedImage, textBoxes) {
- const results = [];
- try {
- console.log(`🔄 开始处理 ${textBoxes.length} 个文本区域`);
- for (let i = 0; i < textBoxes.length; i++) {
- const box = textBoxes[i];
- try {
- console.log(`\n📦 处理区域 ${i + 1}/${textBoxes.length}, 置信度: ${box.confidence.toFixed(4)}`);
- const textRegion = await this.textRegionCropper.cropTextRegion(
- processedImage.buffer, box, i + 1
- );
- if (!textRegion) {
- console.log(`⏭️ 区域 ${i + 1}: 跳过无效区域`);
- continue;
- }
- const { clsResult, clsConfidence } = await this.textDirectionClassifier.classifyTextDirection(
- textRegion.buffer
- );
- let recognitionImage = textRegion.buffer;
- if (clsResult === 180 && clsConfidence > this.config.clsThresh) {
- console.log(`🔄 区域 ${i + 1}: 旋转 180°`);
- recognitionImage = await this.textRegionCropper.rotateImage(textRegion.buffer, 180);
- }
- const textResult = await this.textRecognizer.recognizeText(recognitionImage, i + 1);
- if (textResult.text && textResult.text.trim().length > 0 && textResult.confidence > 0.05) {
- const originalBox = this.scaleBoxToOriginalImage(box, processedImage);
- results.push({
- text: textResult.text.trim(),
- confidence: textResult.confidence * clsConfidence,
- box: originalBox,
- clsResult,
- clsConfidence,
- regionIndex: i + 1
- });
- console.log(`✅ 区域 ${i + 1}: 识别成功 "${textResult.text}"`);
- } else {
- console.log(`❌ 区域 ${i + 1}: 识别失败或置信度过低`);
- }
- } catch (error) {
- console.error(`💥 区域 ${i + 1}: 处理失败`, error.message);
- continue;
- }
- }
- console.log(`\n🎯 识别完成: ${results.length}/${textBoxes.length} 个区域成功`);
- return results;
- } catch (error) {
- console.error('❌ 整体识别失败:', error);
- throw error;
- }
- }
- scaleBoxToOriginalImage(box, processedImage) {
- const {
- scaleX, scaleY,
- paddingX, paddingY,
- originalWidth, originalHeight
- } = processedImage;
- const paddedX1 = box.x1 * scaleX;
- const paddedY1 = box.y1 * scaleY;
- const paddedX3 = box.x3 * scaleX;
- const paddedY3 = box.y3 * scaleY;
- const originalX1 = paddedX1 - paddingX;
- const originalY1 = paddedY1 - paddingY;
- const originalX3 = paddedX3 - paddingX;
- const originalY3 = paddedY3 - paddingY;
- const clamp = (value, max) => Math.max(0, Math.min(max, value));
- return {
- x1: clamp(originalX1, originalWidth - 1),
- y1: clamp(originalY1, originalHeight - 1),
- x2: clamp(originalX3, originalWidth - 1),
- y2: clamp(originalY1, originalHeight - 1),
- x3: clamp(originalX3, originalWidth - 1),
- y3: clamp(originalY3, originalHeight - 1),
- x4: clamp(originalX1, originalWidth - 1),
- y4: clamp(originalY3, originalHeight - 1),
- confidence: box.confidence
- };
- }
- }
- export default RecognitionProcessor;
|