hai 3 meses · bb4d2159e4
--- a/server/utils/textRecognizer.js
+++ b/server/utils/textRecognizer.js
@@ -87,7 +87,6 @@ class TextRecognizer {
 
				             throw new Error(`字符集加载失败: ${error.message}`);
			
 
				         }
			
 
				     }
			
 
				-
			
 
				     getCharacterSetSize() {
			
 
				         return this.characterSet.length;
			
 
				     }
			
@@ -165,32 +164,79 @@ class TextRecognizer {
 
				         }
			
 
				     }
			
 
				 
			
 
				+// server/utils/textRecognizer.js
			
 
				+// 增强的图像预处理
			
 
				+
			
 
				     async applySmartPreprocessing(buffer, meanBrightness, stdDev, regionIndex = 0) {
			
 
				         let processedBuffer = buffer;
			
 
				 
			
 
				-        if (meanBrightness > 200 && stdDev < 30) {
			
 
				-            this.logger.debug(`区域 ${regionIndex}: 应用高亮度图像增强`);
			
 
				-            processedBuffer = await sharp(buffer)
			
 
				-                .linear(1.5, -50)
			
 
				-                .normalize()
			
 
				-                .grayscale()
			
 
				-                .toBuffer();
			
 
				-        } else if (meanBrightness < 80) {
			
 
				-            this.logger.debug(`区域 ${regionIndex}: 应用低亮度图像增强`);
			
 
				-            processedBuffer = await sharp(buffer)
			
 
				-                .linear(1.2, 30)
			
 
				-                .normalize()
			
 
				-                .grayscale()
			
 
				-                .toBuffer();
			
 
				-        } else if (stdDev < 20) {
			
 
				-            this.logger.debug(`区域 ${regionIndex}: 应用低对比度增强`);
			
 
				-            processedBuffer = await sharp(buffer)
			
 
				-                .linear(1.3, -20)
			
 
				-                .normalize()
			
 
				+        try {
			
 
				+            // 更精细的图像分析
			
 
				+            const stats = await sharp(buffer)
			
 
				                 .grayscale()
			
 
				+                .stats();
			
 
				+
			
 
				+            const median = stats.channels[0].median;
			
 
				+            const max = stats.channels[0].max;
			
 
				+            const min = stats.channels[0].min;
			
 
				+
			
 
				+            this.logger.debug(`区域 ${regionIndex}: 详细统计 - 中值=${median}, 范围=${min}-${max}, 均值=${meanBrightness.toFixed(1)}, 标准差=${stdDev.toFixed(1)}`);
			
 
				+
			
 
				+            // 更智能的预处理策略
			
 
				+            if (meanBrightness > 220 && stdDev < 25) {
			
 
				+                // 高亮度低对比度图像
			
 
				+                this.logger.debug(`区域 ${regionIndex}: 应用高亮度低对比度增强`);
			
 
				+                processedBuffer = await sharp(buffer)
			
 
				+                    .linear(1.8, -80)  // 更强的对比度增强
			
 
				+                    .normalize({ lower: 5, upper: 95 }) // 更激进的归一化
			
 
				+                    .grayscale()
			
 
				+                    .toBuffer();
			
 
				+            } else if (meanBrightness < 70) {
			
 
				+                // 低亮度图像
			
 
				+                this.logger.debug(`区域 ${regionIndex}: 应用低亮度增强`);
			
 
				+                processedBuffer = await sharp(buffer)
			
 
				+                    .linear(1.5, 50)   // 更强的亮度提升
			
 
				+                    .normalize()
			
 
				+                    .grayscale()
			
 
				+                    .toBuffer();
			
 
				+            } else if (stdDev < 15) {
			
 
				+                // 极低对比度
			
 
				+                this.logger.debug(`区域 ${regionIndex}: 应用极低对比度增强`);
			
 
				+                processedBuffer = await sharp(buffer)
			
 
				+                    .linear(2.0, -30)  // 非常强的对比度增强
			
 
				+                    .normalize({ lower: 1, upper: 99 })
			
 
				+                    .grayscale()
			
 
				+                    .toBuffer();
			
 
				+            } else if (stdDev > 80) {
			
 
				+                // 高对比度图像，可能过度增强
			
 
				+                this.logger.debug(`区域 ${regionIndex}: 应用高对比度抑制`);
			
 
				+                processedBuffer = await sharp(buffer)
			
 
				+                    .linear(0.8, 20)   // 降低对比度
			
 
				+                    .normalize()
			
 
				+                    .grayscale()
			
 
				+                    .toBuffer();
			
 
				+            } else {
			
 
				+                // 标准处理
			
 
				+                this.logger.debug(`区域 ${regionIndex}: 应用标准增强`);
			
 
				+                processedBuffer = await sharp(buffer)
			
 
				+                    .linear(1.3, -15)  // 适度的对比度增强
			
 
				+                    .normalize({ lower: 10, upper: 90 })
			
 
				+                    .grayscale()
			
 
				+                    .toBuffer();
			
 
				+            }
			
 
				+
			
 
				+            // 应用锐化滤波增强文字边缘
			
 
				+            processedBuffer = await sharp(processedBuffer)
			
 
				+                .sharpen({
			
 
				+                    sigma: 1.2,
			
 
				+                    m1: 1.5,
			
 
				+                    m2: 0.7
			
 
				+                })
			
 
				                 .toBuffer();
			
 
				-        } else {
			
 
				-            this.logger.debug(`区域 ${regionIndex}: 应用标准化灰度处理`);
			
 
				+
			
 
				+        } catch (error) {
			
 
				+            this.logger.error(`区域 ${regionIndex}: 预处理失败`, error);
			
 
				+            // 回退到基本处理
			
 
				             processedBuffer = await sharp(buffer)
			
 
				                 .normalize()
			
 
				                 .grayscale()
			
@@ -346,88 +392,114 @@ class TextRecognizer {
 
				         const baseThreshold = 0.03;
			
 
				         let confidenceThreshold = baseThreshold;
			
 
				 
			
 
				-        // 先分析整个序列的置信度分布
			
 
				+        // 分析序列置信度分布
			
 
				         let maxSequenceProb = 0;
			
 
				+        let minSequenceProb = 1;
			
 
				+        let sumProb = 0;
			
 
				+        let probCount = 0;
			
 
				+
			
 
				         for (let t = 0; t < seqLen; t++) {
			
 
				             for (let i = 0; i < vocabSize; i++) {
			
 
				-                maxSequenceProb = Math.max(maxSequenceProb, data[t * vocabSize + i]);
			
 
				+                const prob = data[t * vocabSize + i];
			
 
				+                if (prob > 0.01) { // 只统计有意义的概率
			
 
				+                    maxSequenceProb = Math.max(maxSequenceProb, prob);
			
 
				+                    minSequenceProb = Math.min(minSequenceProb, prob);
			
 
				+                    sumProb += prob;
			
 
				+                    probCount++;
			
 
				+                }
			
 
				             }
			
 
				         }
			
 
				 
			
 
				-        // 如果整体置信度较低，降低阈值
			
 
				-        if (maxSequenceProb < 0.5) {
			
 
				+        const avgProb = probCount > 0 ? sumProb / probCount : 0;
			
 
				+
			
 
				+        // 根据序列特性动态调整阈值
			
 
				+        if (avgProb < 0.3) {
			
 
				             confidenceThreshold = baseThreshold * 0.5;
			
 
				+        } else if (avgProb > 0.7) {
			
 
				+            confidenceThreshold = baseThreshold * 1.5;
			
 
				         }
			
 
				 
			
 
				-        this.logger.debug(`使用解码阈值: ${confidenceThreshold.toFixed(4)}`);
			
 
				+        this.logger.debug(`序列统计: 平均概率=${avgProb.toFixed(4)}, 使用解码阈值: ${confidenceThreshold.toFixed(4)}`);
			
 
				+
			
 
				+        // 改进的beam search算法
			
 
				+        const beamWidth = 5;
			
 
				+        let beams = [{ text: '', confidence: 1.0, lastChar: -1 }];
			
 
				 
			
 
				         for (let t = 0; t < seqLen; t++) {
			
 
				-            let maxProb = -1;
			
 
				-            let maxIndex = -1;
			
 
				+            const newBeams = [];
			
 
				 
			
 
				-            // 找到当前时间步的最大概率字符
			
 
				+            // 获取当前时间步的top-k字符
			
 
				+            const topK = [];
			
 
				             for (let i = 0; i < vocabSize; i++) {
			
 
				                 const prob = data[t * vocabSize + i];
			
 
				-                if (prob > maxProb) {
			
 
				-                    maxProb = prob;
			
 
				-                    maxIndex = i;
			
 
				+                if (prob > confidenceThreshold) {
			
 
				+                    topK.push({ index: i, prob });
			
 
				                 }
			
 
				             }
			
 
				 
			
 
				-            // 改进的CTC解码逻辑
			
 
				-            if (maxIndex > 0 && maxProb > confidenceThreshold) {
			
 
				-                const charIndex = maxIndex - 1;
			
 
				-                if (charIndex < this.characterSet.length) {
			
 
				-                    const char = this.characterSet[charIndex];
			
 
				-
			
 
				-                    // 更智能的重复字符处理
			
 
				-                    const shouldAddChar = maxIndex !== lastCharIndex ||
			
 
				-                        maxProb > 0.8 ||
			
 
				-                        (maxIndex === lastCharIndex && charCount > 0 && text[text.length - 1] !== char);
			
 
				-
			
 
				-                    if (shouldAddChar && char && char.trim() !== '') {
			
 
				-                        text += char;
			
 
				-                        confidenceSum += maxProb;
			
 
				-                        charCount++;
			
 
				+            // 按概率排序
			
 
				+            topK.sort((a, b) => b.prob - a.prob);
			
 
				+            const candidates = topK.slice(0, beamWidth);
			
 
				+
			
 
				+            // 为每个beam扩展候选字符
			
 
				+            for (const beam of beams) {
			
 
				+                for (const candidate of candidates) {
			
 
				+                    const charIndex = candidate.index;
			
 
				+
			
 
				+                    if (charIndex === 0) {
			
 
				+                        // 空白字符
			
 
				+                        newBeams.push({
			
 
				+                            text: beam.text,
			
 
				+                            confidence: beam.confidence,
			
 
				+                            lastChar: -1
			
 
				+                        });
			
 
				+                    } else {
			
 
				+                        const actualCharIndex = charIndex - 1;
			
 
				+                        if (actualCharIndex < this.characterSet.length) {
			
 
				+                            const char = this.characterSet[actualCharIndex];
			
 
				+                            let newText = beam.text;
			
 
				+
			
 
				+                            // 处理重复字符
			
 
				+                            if (charIndex !== beam.lastChar) {
			
 
				+                                newText += char;
			
 
				+                            }
			
 
				+
			
 
				+                            newBeams.push({
			
 
				+                                text: newText,
			
 
				+                                confidence: beam.confidence * candidate.prob,
			
 
				+                                lastChar: charIndex
			
 
				+                            });
			
 
				+                        }
			
 
				                     }
			
 
				-                    lastCharIndex = maxIndex;
			
 
				-                } else {
			
 
				-                    this.logger.warn(`字符索引${charIndex}超出字符集范围(0-${this.characterSet.length-1})`);
			
 
				                 }
			
 
				-            } else if (maxIndex === 0) {
			
 
				-                lastCharIndex = -1;
			
 
				             }
			
 
				+
			
 
				+            // 选择top beamWidth个beam
			
 
				+            newBeams.sort((a, b) => b.confidence - a.confidence);
			
 
				+            beams = newBeams.slice(0, beamWidth);
			
 
				         }
			
 
				 
			
 
				-        const avgConfidence = charCount > 0 ? confidenceSum / charCount : 0;
			
 
				+        // 选择最佳beam
			
 
				+        if (beams.length > 0) {
			
 
				+            const bestBeam = beams[0];
			
 
				+            text = bestBeam.text;
			
 
				 
			
 
				-        // 基本的文本清理（不包含错误模式修复）
			
 
				-        const cleanedText = this.basicTextCleaning(text);
			
 
				+            // 计算平均置信度（几何平均）
			
 
				+            const textLength = text.length;
			
 
				+            if (textLength > 0) {
			
 
				+                confidenceSum = Math.pow(bestBeam.confidence, 1 / textLength);
			
 
				+                charCount = textLength;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        const avgConfidence = charCount > 0 ? confidenceSum : 0;
			
 
				 
			
 
				         return {
			
 
				-            text: cleanedText,
			
 
				+            text: text,
			
 
				             confidence: avgConfidence
			
 
				         };
			
 
				     }
			
 
				 
			
 
				-    basicTextCleaning(text) {
			
 
				-        if (!text) return '';
			
 
				-
			
 
				-        let cleaned = text;
			
 
				-
			
 
				-        // 1. 移除过多的重复字符（保留合理的重复）
			
 
				-        cleaned = cleaned.replace(/([^0-9])\1{2,}/g, '$1$1');
			
 
				-
			
 
				-        // 2. 修复标点符号
			
 
				-        cleaned = cleaned.replace(/∶/g, '：')
			
 
				-            .replace(/《/g, '（')
			
 
				-            .replace(/》/g, '）');
			
 
				-
			
 
				-        // 3. 修复数字和百分号
			
 
				-        cleaned = cleaned.replace(/(\d+)%%/g, '$1%');
			
 
				-
			
 
				-        return cleaned.trim();
			
 
				-    }
			
 
				 }
			
 
				 
			
 
				 export default TextRecognizer;