|
@@ -87,7 +87,6 @@ class TextRecognizer {
|
|
|
throw new Error(`字符集加载失败: ${error.message}`);
|
|
throw new Error(`字符集加载失败: ${error.message}`);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
getCharacterSetSize() {
|
|
getCharacterSetSize() {
|
|
|
return this.characterSet.length;
|
|
return this.characterSet.length;
|
|
|
}
|
|
}
|
|
@@ -165,32 +164,79 @@ class TextRecognizer {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+// server/utils/textRecognizer.js
|
|
|
|
|
+// 增强的图像预处理
|
|
|
|
|
+
|
|
|
async applySmartPreprocessing(buffer, meanBrightness, stdDev, regionIndex = 0) {
|
|
async applySmartPreprocessing(buffer, meanBrightness, stdDev, regionIndex = 0) {
|
|
|
let processedBuffer = buffer;
|
|
let processedBuffer = buffer;
|
|
|
|
|
|
|
|
- if (meanBrightness > 200 && stdDev < 30) {
|
|
|
|
|
- this.logger.debug(`区域 ${regionIndex}: 应用高亮度图像增强`);
|
|
|
|
|
- processedBuffer = await sharp(buffer)
|
|
|
|
|
- .linear(1.5, -50)
|
|
|
|
|
- .normalize()
|
|
|
|
|
- .grayscale()
|
|
|
|
|
- .toBuffer();
|
|
|
|
|
- } else if (meanBrightness < 80) {
|
|
|
|
|
- this.logger.debug(`区域 ${regionIndex}: 应用低亮度图像增强`);
|
|
|
|
|
- processedBuffer = await sharp(buffer)
|
|
|
|
|
- .linear(1.2, 30)
|
|
|
|
|
- .normalize()
|
|
|
|
|
- .grayscale()
|
|
|
|
|
- .toBuffer();
|
|
|
|
|
- } else if (stdDev < 20) {
|
|
|
|
|
- this.logger.debug(`区域 ${regionIndex}: 应用低对比度增强`);
|
|
|
|
|
- processedBuffer = await sharp(buffer)
|
|
|
|
|
- .linear(1.3, -20)
|
|
|
|
|
- .normalize()
|
|
|
|
|
|
|
+ try {
|
|
|
|
|
+ // 更精细的图像分析
|
|
|
|
|
+ const stats = await sharp(buffer)
|
|
|
.grayscale()
|
|
.grayscale()
|
|
|
|
|
+ .stats();
|
|
|
|
|
+
|
|
|
|
|
+ const median = stats.channels[0].median;
|
|
|
|
|
+ const max = stats.channels[0].max;
|
|
|
|
|
+ const min = stats.channels[0].min;
|
|
|
|
|
+
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 详细统计 - 中值=${median}, 范围=${min}-${max}, 均值=${meanBrightness.toFixed(1)}, 标准差=${stdDev.toFixed(1)}`);
|
|
|
|
|
+
|
|
|
|
|
+ // 更智能的预处理策略
|
|
|
|
|
+ if (meanBrightness > 220 && stdDev < 25) {
|
|
|
|
|
+ // 高亮度低对比度图像
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用高亮度低对比度增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(1.8, -80) // 更强的对比度增强
|
|
|
|
|
+ .normalize({ lower: 5, upper: 95 }) // 更激进的归一化
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else if (meanBrightness < 70) {
|
|
|
|
|
+ // 低亮度图像
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用低亮度增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(1.5, 50) // 更强的亮度提升
|
|
|
|
|
+ .normalize()
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else if (stdDev < 15) {
|
|
|
|
|
+ // 极低对比度
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用极低对比度增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(2.0, -30) // 非常强的对比度增强
|
|
|
|
|
+ .normalize({ lower: 1, upper: 99 })
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else if (stdDev > 80) {
|
|
|
|
|
+ // 高对比度图像,可能过度增强
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用高对比度抑制`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(0.8, 20) // 降低对比度
|
|
|
|
|
+ .normalize()
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // 标准处理
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用标准增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(1.3, -15) // 适度的对比度增强
|
|
|
|
|
+ .normalize({ lower: 10, upper: 90 })
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 应用锐化滤波增强文字边缘
|
|
|
|
|
+ processedBuffer = await sharp(processedBuffer)
|
|
|
|
|
+ .sharpen({
|
|
|
|
|
+ sigma: 1.2,
|
|
|
|
|
+ m1: 1.5,
|
|
|
|
|
+ m2: 0.7
|
|
|
|
|
+ })
|
|
|
.toBuffer();
|
|
.toBuffer();
|
|
|
- } else {
|
|
|
|
|
- this.logger.debug(`区域 ${regionIndex}: 应用标准化灰度处理`);
|
|
|
|
|
|
|
+
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ this.logger.error(`区域 ${regionIndex}: 预处理失败`, error);
|
|
|
|
|
+ // 回退到基本处理
|
|
|
processedBuffer = await sharp(buffer)
|
|
processedBuffer = await sharp(buffer)
|
|
|
.normalize()
|
|
.normalize()
|
|
|
.grayscale()
|
|
.grayscale()
|
|
@@ -346,88 +392,114 @@ class TextRecognizer {
|
|
|
const baseThreshold = 0.03;
|
|
const baseThreshold = 0.03;
|
|
|
let confidenceThreshold = baseThreshold;
|
|
let confidenceThreshold = baseThreshold;
|
|
|
|
|
|
|
|
- // 先分析整个序列的置信度分布
|
|
|
|
|
|
|
+ // 分析序列置信度分布
|
|
|
let maxSequenceProb = 0;
|
|
let maxSequenceProb = 0;
|
|
|
|
|
+ let minSequenceProb = 1;
|
|
|
|
|
+ let sumProb = 0;
|
|
|
|
|
+ let probCount = 0;
|
|
|
|
|
+
|
|
|
for (let t = 0; t < seqLen; t++) {
|
|
for (let t = 0; t < seqLen; t++) {
|
|
|
for (let i = 0; i < vocabSize; i++) {
|
|
for (let i = 0; i < vocabSize; i++) {
|
|
|
- maxSequenceProb = Math.max(maxSequenceProb, data[t * vocabSize + i]);
|
|
|
|
|
|
|
+ const prob = data[t * vocabSize + i];
|
|
|
|
|
+ if (prob > 0.01) { // 只统计有意义的概率
|
|
|
|
|
+ maxSequenceProb = Math.max(maxSequenceProb, prob);
|
|
|
|
|
+ minSequenceProb = Math.min(minSequenceProb, prob);
|
|
|
|
|
+ sumProb += prob;
|
|
|
|
|
+ probCount++;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 如果整体置信度较低,降低阈值
|
|
|
|
|
- if (maxSequenceProb < 0.5) {
|
|
|
|
|
|
|
+ const avgProb = probCount > 0 ? sumProb / probCount : 0;
|
|
|
|
|
+
|
|
|
|
|
+ // 根据序列特性动态调整阈值
|
|
|
|
|
+ if (avgProb < 0.3) {
|
|
|
confidenceThreshold = baseThreshold * 0.5;
|
|
confidenceThreshold = baseThreshold * 0.5;
|
|
|
|
|
+ } else if (avgProb > 0.7) {
|
|
|
|
|
+ confidenceThreshold = baseThreshold * 1.5;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- this.logger.debug(`使用解码阈值: ${confidenceThreshold.toFixed(4)}`);
|
|
|
|
|
|
|
+ this.logger.debug(`序列统计: 平均概率=${avgProb.toFixed(4)}, 使用解码阈值: ${confidenceThreshold.toFixed(4)}`);
|
|
|
|
|
+
|
|
|
|
|
+ // 改进的beam search算法
|
|
|
|
|
+ const beamWidth = 5;
|
|
|
|
|
+ let beams = [{ text: '', confidence: 1.0, lastChar: -1 }];
|
|
|
|
|
|
|
|
for (let t = 0; t < seqLen; t++) {
|
|
for (let t = 0; t < seqLen; t++) {
|
|
|
- let maxProb = -1;
|
|
|
|
|
- let maxIndex = -1;
|
|
|
|
|
|
|
+ const newBeams = [];
|
|
|
|
|
|
|
|
- // 找到当前时间步的最大概率字符
|
|
|
|
|
|
|
+ // 获取当前时间步的top-k字符
|
|
|
|
|
+ const topK = [];
|
|
|
for (let i = 0; i < vocabSize; i++) {
|
|
for (let i = 0; i < vocabSize; i++) {
|
|
|
const prob = data[t * vocabSize + i];
|
|
const prob = data[t * vocabSize + i];
|
|
|
- if (prob > maxProb) {
|
|
|
|
|
- maxProb = prob;
|
|
|
|
|
- maxIndex = i;
|
|
|
|
|
|
|
+ if (prob > confidenceThreshold) {
|
|
|
|
|
+ topK.push({ index: i, prob });
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 改进的CTC解码逻辑
|
|
|
|
|
- if (maxIndex > 0 && maxProb > confidenceThreshold) {
|
|
|
|
|
- const charIndex = maxIndex - 1;
|
|
|
|
|
- if (charIndex < this.characterSet.length) {
|
|
|
|
|
- const char = this.characterSet[charIndex];
|
|
|
|
|
-
|
|
|
|
|
- // 更智能的重复字符处理
|
|
|
|
|
- const shouldAddChar = maxIndex !== lastCharIndex ||
|
|
|
|
|
- maxProb > 0.8 ||
|
|
|
|
|
- (maxIndex === lastCharIndex && charCount > 0 && text[text.length - 1] !== char);
|
|
|
|
|
-
|
|
|
|
|
- if (shouldAddChar && char && char.trim() !== '') {
|
|
|
|
|
- text += char;
|
|
|
|
|
- confidenceSum += maxProb;
|
|
|
|
|
- charCount++;
|
|
|
|
|
|
|
+ // 按概率排序
|
|
|
|
|
+ topK.sort((a, b) => b.prob - a.prob);
|
|
|
|
|
+ const candidates = topK.slice(0, beamWidth);
|
|
|
|
|
+
|
|
|
|
|
+ // 为每个beam扩展候选字符
|
|
|
|
|
+ for (const beam of beams) {
|
|
|
|
|
+ for (const candidate of candidates) {
|
|
|
|
|
+ const charIndex = candidate.index;
|
|
|
|
|
+
|
|
|
|
|
+ if (charIndex === 0) {
|
|
|
|
|
+ // 空白字符
|
|
|
|
|
+ newBeams.push({
|
|
|
|
|
+ text: beam.text,
|
|
|
|
|
+ confidence: beam.confidence,
|
|
|
|
|
+ lastChar: -1
|
|
|
|
|
+ });
|
|
|
|
|
+ } else {
|
|
|
|
|
+ const actualCharIndex = charIndex - 1;
|
|
|
|
|
+ if (actualCharIndex < this.characterSet.length) {
|
|
|
|
|
+ const char = this.characterSet[actualCharIndex];
|
|
|
|
|
+ let newText = beam.text;
|
|
|
|
|
+
|
|
|
|
|
+ // 处理重复字符
|
|
|
|
|
+ if (charIndex !== beam.lastChar) {
|
|
|
|
|
+ newText += char;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ newBeams.push({
|
|
|
|
|
+ text: newText,
|
|
|
|
|
+ confidence: beam.confidence * candidate.prob,
|
|
|
|
|
+ lastChar: charIndex
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
- lastCharIndex = maxIndex;
|
|
|
|
|
- } else {
|
|
|
|
|
- this.logger.warn(`字符索引${charIndex}超出字符集范围(0-${this.characterSet.length-1})`);
|
|
|
|
|
}
|
|
}
|
|
|
- } else if (maxIndex === 0) {
|
|
|
|
|
- lastCharIndex = -1;
|
|
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ // 选择top beamWidth个beam
|
|
|
|
|
+ newBeams.sort((a, b) => b.confidence - a.confidence);
|
|
|
|
|
+ beams = newBeams.slice(0, beamWidth);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- const avgConfidence = charCount > 0 ? confidenceSum / charCount : 0;
|
|
|
|
|
|
|
+ // 选择最佳beam
|
|
|
|
|
+ if (beams.length > 0) {
|
|
|
|
|
+ const bestBeam = beams[0];
|
|
|
|
|
+ text = bestBeam.text;
|
|
|
|
|
|
|
|
- // 基本的文本清理(不包含错误模式修复)
|
|
|
|
|
- const cleanedText = this.basicTextCleaning(text);
|
|
|
|
|
|
|
+ // 计算平均置信度(几何平均)
|
|
|
|
|
+ const textLength = text.length;
|
|
|
|
|
+ if (textLength > 0) {
|
|
|
|
|
+ confidenceSum = Math.pow(bestBeam.confidence, 1 / textLength);
|
|
|
|
|
+ charCount = textLength;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const avgConfidence = charCount > 0 ? confidenceSum : 0;
|
|
|
|
|
|
|
|
return {
|
|
return {
|
|
|
- text: cleanedText,
|
|
|
|
|
|
|
+ text: text,
|
|
|
confidence: avgConfidence
|
|
confidence: avgConfidence
|
|
|
};
|
|
};
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- basicTextCleaning(text) {
|
|
|
|
|
- if (!text) return '';
|
|
|
|
|
-
|
|
|
|
|
- let cleaned = text;
|
|
|
|
|
-
|
|
|
|
|
- // 1. 移除过多的重复字符(保留合理的重复)
|
|
|
|
|
- cleaned = cleaned.replace(/([^0-9])\1{2,}/g, '$1$1');
|
|
|
|
|
-
|
|
|
|
|
- // 2. 修复标点符号
|
|
|
|
|
- cleaned = cleaned.replace(/∶/g, ':')
|
|
|
|
|
- .replace(/《/g, '(')
|
|
|
|
|
- .replace(/》/g, ')');
|
|
|
|
|
-
|
|
|
|
|
- // 3. 修复数字和百分号
|
|
|
|
|
- cleaned = cleaned.replace(/(\d+)%%/g, '$1%');
|
|
|
|
|
-
|
|
|
|
|
- return cleaned.trim();
|
|
|
|
|
- }
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
export default TextRecognizer;
|
|
export default TextRecognizer;
|