|
@@ -10,12 +10,23 @@ class TextRecognizer {
|
|
|
this.config = null;
|
|
this.config = null;
|
|
|
this.characterSet = [];
|
|
this.characterSet = [];
|
|
|
this.debugDir = path.join(process.cwd(), 'temp', 'debug');
|
|
this.debugDir = path.join(process.cwd(), 'temp', 'debug');
|
|
|
|
|
+ this.preprocessedDir = path.join(process.cwd(), 'temp', 'preprocessed');
|
|
|
|
|
+ this.logger = {
|
|
|
|
|
+ info: (msg, ...args) => console.log(`🔤 [识别] ${msg}`, ...args),
|
|
|
|
|
+ error: (msg, ...args) => console.error(`❌ [识别] ${msg}`, ...args),
|
|
|
|
|
+ debug: (msg, ...args) => console.log(`🐛 [识别] ${msg}`, ...args),
|
|
|
|
|
+ warn: (msg, ...args) => console.warn(`🐛 [识别] ${msg}`, ...args)
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ // 确保目录存在
|
|
|
fse.ensureDirSync(this.debugDir);
|
|
fse.ensureDirSync(this.debugDir);
|
|
|
|
|
+ fse.ensureDirSync(this.preprocessedDir);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
initialize(recSession, config) {
|
|
initialize(recSession, config) {
|
|
|
this.recSession = recSession;
|
|
this.recSession = recSession;
|
|
|
this.config = config;
|
|
this.config = config;
|
|
|
|
|
+ this.logger.info('文本识别器初始化完成');
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
async loadCharacterSet(keysPath) {
|
|
async loadCharacterSet(keysPath) {
|
|
@@ -24,346 +35,398 @@ class TextRecognizer {
|
|
|
this.characterSet = [];
|
|
this.characterSet = [];
|
|
|
const lines = keysContent.split('\n');
|
|
const lines = keysContent.split('\n');
|
|
|
|
|
|
|
|
|
|
+ // 使用提供的字符集文件
|
|
|
|
|
+ const uniqueChars = new Set();
|
|
|
|
|
+
|
|
|
for (const line of lines) {
|
|
for (const line of lines) {
|
|
|
const trimmed = line.trim();
|
|
const trimmed = line.trim();
|
|
|
|
|
+ // 跳过空行和注释行
|
|
|
if (trimmed && !trimmed.startsWith('#')) {
|
|
if (trimmed && !trimmed.startsWith('#')) {
|
|
|
- for (const char of trimmed) {
|
|
|
|
|
- if (char.trim() && !this.characterSet.includes(char)) {
|
|
|
|
|
- this.characterSet.push(char);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ // 将每行作为一个完整的字符处理
|
|
|
|
|
+ uniqueChars.add(trimmed);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ this.characterSet = Array.from(uniqueChars);
|
|
|
|
|
+
|
|
|
if (this.characterSet.length === 0) {
|
|
if (this.characterSet.length === 0) {
|
|
|
throw new Error('字符集文件为空或格式不正确');
|
|
throw new Error('字符集文件为空或格式不正确');
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- console.log(`✅ 字符集加载完成,共 ${this.characterSet.length} 个字符`);
|
|
|
|
|
|
|
+ this.logger.info(`字符集加载完成: ${this.characterSet.length}个字符`);
|
|
|
|
|
|
|
|
- } catch (error) {
|
|
|
|
|
- console.error('❌ 加载字符集失败,使用默认字符集:', error.message);
|
|
|
|
|
- this.characterSet = this.getDefaultCharacterSet();
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ // 记录字符集统计信息
|
|
|
|
|
+ const charTypes = {
|
|
|
|
|
+ chinese: 0,
|
|
|
|
|
+ english: 0,
|
|
|
|
|
+ digit: 0,
|
|
|
|
|
+ punctuation: 0,
|
|
|
|
|
+ other: 0
|
|
|
|
|
+ };
|
|
|
|
|
|
|
|
- getDefaultCharacterSet() {
|
|
|
|
|
- const defaultSet = [];
|
|
|
|
|
- for (let i = 0; i <= 9; i++) defaultSet.push(i.toString());
|
|
|
|
|
- for (let i = 97; i <= 122; i++) defaultSet.push(String.fromCharCode(i));
|
|
|
|
|
- for (let i = 65; i <= 90; i++) defaultSet.push(String.fromCharCode(i));
|
|
|
|
|
- defaultSet.push(...' ,。!?;:""()【】《》…—·'.split(''));
|
|
|
|
|
|
|
+ this.characterSet.forEach(char => {
|
|
|
|
|
+ if (/[\u4e00-\u9fff]/.test(char)) {
|
|
|
|
|
+ charTypes.chinese++;
|
|
|
|
|
+ } else if (/[a-zA-Z]/.test(char)) {
|
|
|
|
|
+ charTypes.english++;
|
|
|
|
|
+ } else if (/[0-9]/.test(char)) {
|
|
|
|
|
+ charTypes.digit++;
|
|
|
|
|
+ } else if (/[,。!?;:""()【】《》…—·]/.test(char)) {
|
|
|
|
|
+ charTypes.punctuation++;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ charTypes.other++;
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
|
|
|
- const commonChinese = '的一是不了在人有的我他这个们中来就时大地为子中你说道生国年着就那和要她出也得里后自以会家可下而过天去能对小多然于心学么之都好看起发当没成只如事把还用第样道想作种开美总从无情已面最女但现前些所同日手又行意动方期它头经长儿回位分爱老因很给名法间斯知世什两次使身者被高已亲其进此话常与活正感';
|
|
|
|
|
- for (const char of commonChinese) {
|
|
|
|
|
- defaultSet.push(char);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ this.logger.debug(`字符集统计: 中文${charTypes.chinese}, 英文${charTypes.english}, 数字${charTypes.digit}, 标点${charTypes.punctuation}, 其他${charTypes.other}`);
|
|
|
|
|
+ this.logger.debug(`前20个字符: ${this.characterSet.slice(0, 20).join('')}`);
|
|
|
|
|
|
|
|
- console.log(`📝 使用默认字符集,共 ${defaultSet.length} 个字符`);
|
|
|
|
|
- return defaultSet;
|
|
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ this.logger.error('加载字符集失败', error.message);
|
|
|
|
|
+ // 完全使用提供的字符集,失败时抛出错误
|
|
|
|
|
+ throw new Error(`字符集加载失败: ${error.message}`);
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
getCharacterSetSize() {
|
|
getCharacterSetSize() {
|
|
|
return this.characterSet.length;
|
|
return this.characterSet.length;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- async recognizeText(textRegionBuffer) {
|
|
|
|
|
- console.log('🔠 === 开始文本识别流程 ===');
|
|
|
|
|
|
|
+ async recognizeText(textRegionBuffer, regionIndex = 0) {
|
|
|
|
|
+ const startTime = Date.now();
|
|
|
|
|
+ this.logger.info(`开始文本识别 - 区域 ${regionIndex}`);
|
|
|
|
|
|
|
|
try {
|
|
try {
|
|
|
- console.log('📥 1. 准备识别输入...');
|
|
|
|
|
- console.log(` - 输入图像大小: ${textRegionBuffer.length} 字节`);
|
|
|
|
|
-
|
|
|
|
|
- const inputTensor = await this.prepareRecognitionInput(textRegionBuffer);
|
|
|
|
|
- console.log('✅ 输入张量准备完成');
|
|
|
|
|
- console.log(` - 张量形状: [${inputTensor.dims.join(', ')}]`);
|
|
|
|
|
- console.log(` - 张量类型: ${inputTensor.type}`);
|
|
|
|
|
- console.log(` - 数据长度: ${inputTensor.data.length}`);
|
|
|
|
|
-
|
|
|
|
|
- // 数据验证
|
|
|
|
|
- const tensorData = inputTensor.data;
|
|
|
|
|
- let minVal = Infinity;
|
|
|
|
|
- let maxVal = -Infinity;
|
|
|
|
|
- let sumVal = 0;
|
|
|
|
|
- let validCount = 0;
|
|
|
|
|
-
|
|
|
|
|
- for (let i = 0; i < Math.min(100, tensorData.length); i++) {
|
|
|
|
|
- const val = tensorData[i];
|
|
|
|
|
- if (!isNaN(val) && isFinite(val)) {
|
|
|
|
|
- minVal = Math.min(minVal, val);
|
|
|
|
|
- maxVal = Math.max(maxVal, val);
|
|
|
|
|
- sumVal += val;
|
|
|
|
|
- validCount++;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- console.log(` - 数据范围: ${minVal.toFixed(4)} ~ ${maxVal.toFixed(4)}`);
|
|
|
|
|
- console.log(` - 数据均值: ${(sumVal / validCount).toFixed(4)}`);
|
|
|
|
|
-
|
|
|
|
|
- console.log('🧠 2. 执行模型推理...');
|
|
|
|
|
- const startInference = Date.now();
|
|
|
|
|
|
|
+ const inputTensor = await this.prepareRecognitionInput(textRegionBuffer, regionIndex);
|
|
|
const outputs = await this.recSession.run({ [this.recSession.inputNames[0]]: inputTensor });
|
|
const outputs = await this.recSession.run({ [this.recSession.inputNames[0]]: inputTensor });
|
|
|
- const inferenceTime = Date.now() - startInference;
|
|
|
|
|
- console.log(`✅ 模型推理完成 (${inferenceTime}ms)`);
|
|
|
|
|
-
|
|
|
|
|
- const outputNames = this.recSession.outputNames;
|
|
|
|
|
- console.log(` - 输出数量: ${outputNames.length}`);
|
|
|
|
|
-
|
|
|
|
|
- outputNames.forEach((name, index) => {
|
|
|
|
|
- const output = outputs[name];
|
|
|
|
|
- if (output) {
|
|
|
|
|
- console.log(` - 输出 ${index + 1} (${name}): 形状 [${output.dims.join(', ')}]`);
|
|
|
|
|
- }
|
|
|
|
|
- });
|
|
|
|
|
-
|
|
|
|
|
- console.log('🔍 3. 后处理识别结果...');
|
|
|
|
|
const result = this.postprocessRecognition(outputs);
|
|
const result = this.postprocessRecognition(outputs);
|
|
|
- console.log('✅ 后处理完成');
|
|
|
|
|
- console.log(` - 识别文本: "${result.text}"`);
|
|
|
|
|
- console.log(` - 置信度: ${result.confidence.toFixed(4)}`);
|
|
|
|
|
- console.log(` - 文本长度: ${result.text.length} 字符`);
|
|
|
|
|
|
|
|
|
|
- console.log('🎉 === 文本识别流程完成 ===');
|
|
|
|
|
|
|
+ const processingTime = Date.now() - startTime;
|
|
|
|
|
+ this.logger.info(`识别完成 - 区域 ${regionIndex}: "${result.text}", 置信度: ${result.confidence.toFixed(4)}, 耗时: ${processingTime}ms`);
|
|
|
|
|
+
|
|
|
return result;
|
|
return result;
|
|
|
|
|
|
|
|
} catch (error) {
|
|
} catch (error) {
|
|
|
- console.error('❌ 文本识别失败:');
|
|
|
|
|
- console.error(` - 错误信息: ${error.message}`);
|
|
|
|
|
|
|
+ this.logger.error(`文本识别失败 - 区域 ${regionIndex}`, error);
|
|
|
return { text: '', confidence: 0 };
|
|
return { text: '', confidence: 0 };
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- async prepareRecognitionInput(textRegionBuffer) {
|
|
|
|
|
- console.log(' 📝 准备识别输入详情:');
|
|
|
|
|
|
|
+ async prepareRecognitionInput(textRegionBuffer, regionIndex = 0) {
|
|
|
|
|
+ this.logger.debug(`准备识别输入 - 区域 ${regionIndex}`);
|
|
|
|
|
|
|
|
- try {
|
|
|
|
|
- const targetHeight = 48;
|
|
|
|
|
- const targetWidth = 320;
|
|
|
|
|
|
|
+ const targetHeight = 48;
|
|
|
|
|
+ const targetWidth = 320; // 原始目标宽度
|
|
|
|
|
+ const finalWidth = targetWidth + 20; // 最终宽度(左右各加10像素)
|
|
|
|
|
+ const timestamp = Date.now();
|
|
|
|
|
|
|
|
|
|
+ try {
|
|
|
const metadata = await sharp(textRegionBuffer).metadata();
|
|
const metadata = await sharp(textRegionBuffer).metadata();
|
|
|
- console.log(` - 原始图像尺寸: ${metadata.width}x${metadata.height}`);
|
|
|
|
|
|
|
+ this.logger.debug(`原始区域 ${regionIndex}: ${metadata.width}x${metadata.height}`);
|
|
|
|
|
|
|
|
- // 保存原始图像用于调试
|
|
|
|
|
- const originalPath = path.join(this.debugDir, `original-${Date.now()}.png`);
|
|
|
|
|
|
|
+ // 保存原始裁剪区域图像
|
|
|
|
|
+ const originalPath = path.join(this.preprocessedDir, `region-${regionIndex}-original-${timestamp}.png`);
|
|
|
await fse.writeFile(originalPath, textRegionBuffer);
|
|
await fse.writeFile(originalPath, textRegionBuffer);
|
|
|
|
|
+ this.logger.debug(`保存原始区域图像: ${originalPath}`);
|
|
|
|
|
|
|
|
- // 关键修复:正确的预处理流程
|
|
|
|
|
- let processedBuffer = textRegionBuffer;
|
|
|
|
|
-
|
|
|
|
|
- // 1. 分析图像特性
|
|
|
|
|
- const stats = await sharp(processedBuffer)
|
|
|
|
|
- .grayscale()
|
|
|
|
|
- .stats();
|
|
|
|
|
|
|
+ // 图像分析
|
|
|
|
|
+ const stats = await sharp(textRegionBuffer).grayscale().stats();
|
|
|
const meanBrightness = stats.channels[0].mean;
|
|
const meanBrightness = stats.channels[0].mean;
|
|
|
const stdDev = stats.channels[0].stdev;
|
|
const stdDev = stats.channels[0].stdev;
|
|
|
|
|
|
|
|
- console.log(` - 图像统计: 均值=${meanBrightness.toFixed(1)}, 标准差=${stdDev.toFixed(1)}`);
|
|
|
|
|
-
|
|
|
|
|
- // 2. 改进的预处理策略
|
|
|
|
|
- if (meanBrightness > 200 && stdDev < 30) {
|
|
|
|
|
- console.log(' - 检测到高亮度图像,进行对比度增强');
|
|
|
|
|
- processedBuffer = await sharp(processedBuffer)
|
|
|
|
|
- .linear(1.5, -50)
|
|
|
|
|
- .normalize()
|
|
|
|
|
- .grayscale()
|
|
|
|
|
- .toBuffer();
|
|
|
|
|
- } else if (meanBrightness < 80) {
|
|
|
|
|
- console.log(' - 检测到低亮度图像,进行亮度调整');
|
|
|
|
|
- processedBuffer = await sharp(processedBuffer)
|
|
|
|
|
- .linear(1.2, 30)
|
|
|
|
|
- .normalize()
|
|
|
|
|
- .grayscale()
|
|
|
|
|
- .toBuffer();
|
|
|
|
|
- } else {
|
|
|
|
|
- console.log(' - 使用标准化灰度处理');
|
|
|
|
|
- processedBuffer = await sharp(processedBuffer)
|
|
|
|
|
- .normalize()
|
|
|
|
|
- .grayscale()
|
|
|
|
|
- .toBuffer();
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ this.logger.debug(`图像统计 - 区域 ${regionIndex}: 亮度=${meanBrightness.toFixed(1)}, 对比度=${stdDev.toFixed(1)}`);
|
|
|
|
|
|
|
|
- // 3. 保持宽高比的resize
|
|
|
|
|
- const originalAspectRatio = metadata.width / metadata.height;
|
|
|
|
|
- const targetAspectRatio = targetWidth / targetHeight;
|
|
|
|
|
|
|
+ // 智能预处理
|
|
|
|
|
+ let processedBuffer = await this.applySmartPreprocessing(textRegionBuffer, meanBrightness, stdDev, regionIndex);
|
|
|
|
|
|
|
|
- let resizeWidth, resizeHeight;
|
|
|
|
|
|
|
+ // 保存预处理后的图像(灰度+对比度调整后)
|
|
|
|
|
+ const processedPath = path.join(this.preprocessedDir, `region-${regionIndex}-processed-${timestamp}.png`);
|
|
|
|
|
+ await fse.writeFile(processedPath, processedBuffer);
|
|
|
|
|
+ this.logger.debug(`保存预处理图像: ${processedPath}`);
|
|
|
|
|
|
|
|
- if (originalAspectRatio > targetAspectRatio) {
|
|
|
|
|
- // 宽度限制
|
|
|
|
|
- resizeWidth = targetWidth;
|
|
|
|
|
- resizeHeight = Math.round(targetWidth / originalAspectRatio);
|
|
|
|
|
- } else {
|
|
|
|
|
- // 高度限制
|
|
|
|
|
- resizeHeight = targetHeight;
|
|
|
|
|
- resizeWidth = Math.round(targetHeight * originalAspectRatio);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ // 保持宽高比的resize,并在左右添加10像素空白
|
|
|
|
|
+ const resizedBuffer = await this.resizeWithAspectRatio(processedBuffer, targetWidth, targetHeight, regionIndex);
|
|
|
|
|
|
|
|
- // 确保尺寸有效
|
|
|
|
|
- resizeWidth = Math.max(1, Math.min(resizeWidth, targetWidth));
|
|
|
|
|
- resizeHeight = Math.max(1, Math.min(resizeHeight, targetHeight));
|
|
|
|
|
-
|
|
|
|
|
- processedBuffer = await sharp(processedBuffer)
|
|
|
|
|
- .resize(resizeWidth, resizeHeight, {
|
|
|
|
|
- fit: 'contain',
|
|
|
|
|
- background: { r: 255, g: 255, b: 255 }
|
|
|
|
|
- })
|
|
|
|
|
- .extend({
|
|
|
|
|
- top: 0,
|
|
|
|
|
- bottom: targetHeight - resizeHeight,
|
|
|
|
|
- left: 0,
|
|
|
|
|
- right: targetWidth - resizeWidth,
|
|
|
|
|
- background: { r: 255, g: 255, b: 255 }
|
|
|
|
|
- })
|
|
|
|
|
- .png()
|
|
|
|
|
- .toBuffer();
|
|
|
|
|
|
|
+ // 保存调整大小后的图像
|
|
|
|
|
+ const resizedPath = path.join(this.preprocessedDir, `region-${regionIndex}-resized-${timestamp}.png`);
|
|
|
|
|
+ await fse.writeFile(resizedPath, resizedBuffer);
|
|
|
|
|
+ this.logger.debug(`保存调整大小图像: ${resizedPath}`);
|
|
|
|
|
|
|
|
- const processedMetadata = await sharp(processedBuffer).metadata();
|
|
|
|
|
- console.log(` - 处理后尺寸: ${processedMetadata.width}x${processedMetadata.height}`);
|
|
|
|
|
|
|
+ // 使用最终尺寸创建张量
|
|
|
|
|
+ const inputData = await this.bufferToTensor(resizedBuffer, finalWidth, targetHeight);
|
|
|
|
|
+ this.logger.debug(`识别输入张量准备完成 - 区域 ${regionIndex}`);
|
|
|
|
|
|
|
|
- // 保存预处理后的图像用于调试
|
|
|
|
|
- const processedPath = path.join(this.debugDir, `processed-${Date.now()}.png`);
|
|
|
|
|
- await fse.writeFile(processedPath, processedBuffer);
|
|
|
|
|
|
|
+ // 创建张量时使用最终尺寸
|
|
|
|
|
+ return new Tensor('float32', inputData, [1, 3, targetHeight, finalWidth]);
|
|
|
|
|
|
|
|
- // 4. 转换为张量 - 关键修复:正确的归一化
|
|
|
|
|
- console.log(' - 转换为张量数据...');
|
|
|
|
|
- const imageData = await sharp(processedBuffer)
|
|
|
|
|
- .ensureAlpha()
|
|
|
|
|
- .raw()
|
|
|
|
|
- .toBuffer({ resolveWithObject: true });
|
|
|
|
|
-
|
|
|
|
|
- const inputData = new Float32Array(3 * targetHeight * targetWidth);
|
|
|
|
|
- const data = imageData.data;
|
|
|
|
|
- const channels = imageData.info.channels;
|
|
|
|
|
-
|
|
|
|
|
- // 使用正确的归一化方法
|
|
|
|
|
- for (let i = 0; i < data.length; i += channels) {
|
|
|
|
|
- const pixelIndex = Math.floor(i / channels);
|
|
|
|
|
- const y = Math.floor(pixelIndex / targetWidth);
|
|
|
|
|
- const x = pixelIndex % targetWidth;
|
|
|
|
|
-
|
|
|
|
|
- // 对每个位置,三个通道使用相同的灰度值
|
|
|
|
|
- const grayValue = data[i] / 255.0;
|
|
|
|
|
-
|
|
|
|
|
- for (let c = 0; c < 3; c++) {
|
|
|
|
|
- const inputIndex = c * targetHeight * targetWidth + y * targetWidth + x;
|
|
|
|
|
- if (inputIndex < inputData.length) {
|
|
|
|
|
- inputData[inputIndex] = grayValue;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ this.logger.error(`准备识别输入失败 - 区域 ${regionIndex}`, error);
|
|
|
|
|
+ return new Tensor('float32', new Float32Array(3 * targetHeight * finalWidth).fill(0.5), [1, 3, targetHeight, finalWidth]);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- console.log(` - 输入数据长度: ${inputData.length}`);
|
|
|
|
|
-
|
|
|
|
|
- // 数据验证
|
|
|
|
|
- let validCount = 0;
|
|
|
|
|
- let sumValue = 0;
|
|
|
|
|
- let minValue = Infinity;
|
|
|
|
|
- let maxValue = -Infinity;
|
|
|
|
|
-
|
|
|
|
|
- for (let i = 0; i < Math.min(100, inputData.length); i++) {
|
|
|
|
|
- const val = inputData[i];
|
|
|
|
|
- if (!isNaN(val) && isFinite(val)) {
|
|
|
|
|
- validCount++;
|
|
|
|
|
- sumValue += val;
|
|
|
|
|
- minValue = Math.min(minValue, val);
|
|
|
|
|
- maxValue = Math.max(maxValue, val);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ async applySmartPreprocessing(buffer, meanBrightness, stdDev, regionIndex = 0) {
|
|
|
|
|
+ let processedBuffer = buffer;
|
|
|
|
|
|
|
|
- console.log(` - 数据验证: 有效=${validCount}`);
|
|
|
|
|
- console.log(` - 数据范围: ${minValue.toFixed(4)} ~ ${maxValue.toFixed(4)}`);
|
|
|
|
|
- console.log(` - 数据均值: ${(sumValue / validCount).toFixed(4)}`);
|
|
|
|
|
|
|
+ if (meanBrightness > 200 && stdDev < 30) {
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用高亮度图像增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(1.5, -50)
|
|
|
|
|
+ .normalize()
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else if (meanBrightness < 80) {
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用低亮度图像增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(1.2, 30)
|
|
|
|
|
+ .normalize()
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else if (stdDev < 20) {
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用低对比度增强`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .linear(1.3, -20)
|
|
|
|
|
+ .normalize()
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ } else {
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 应用标准化灰度处理`);
|
|
|
|
|
+ processedBuffer = await sharp(buffer)
|
|
|
|
|
+ .normalize()
|
|
|
|
|
+ .grayscale()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- return new Tensor('float32', inputData, [1, 3, targetHeight, targetWidth]);
|
|
|
|
|
|
|
+ return processedBuffer;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- } catch (error) {
|
|
|
|
|
- console.error(` ❌ 准备输入失败: ${error.message}`);
|
|
|
|
|
- // 返回有效的默认张量
|
|
|
|
|
- return new Tensor('float32', new Float32Array(3 * 48 * 320).fill(0.5), [1, 3, 48, 320]);
|
|
|
|
|
|
|
+ async resizeWithAspectRatio(buffer, targetWidth, targetHeight, regionIndex = 0) {
|
|
|
|
|
+ const metadata = await sharp(buffer).metadata();
|
|
|
|
|
+ const originalAspectRatio = metadata.width / metadata.height;
|
|
|
|
|
+ const targetAspectRatio = targetWidth / targetHeight;
|
|
|
|
|
+
|
|
|
|
|
+ let resizeWidth, resizeHeight;
|
|
|
|
|
+
|
|
|
|
|
+ if (originalAspectRatio > targetAspectRatio) {
|
|
|
|
|
+ // 宽度限制,按宽度缩放
|
|
|
|
|
+ resizeWidth = targetWidth;
|
|
|
|
|
+ resizeHeight = Math.round(targetWidth / originalAspectRatio);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // 高度限制,按高度缩放
|
|
|
|
|
+ resizeHeight = targetHeight;
|
|
|
|
|
+ resizeWidth = Math.round(targetHeight * originalAspectRatio);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ resizeWidth = Math.max(1, Math.min(resizeWidth, targetWidth));
|
|
|
|
|
+ resizeHeight = Math.max(1, Math.min(resizeHeight, targetHeight));
|
|
|
|
|
+
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 调整尺寸 ${metadata.width}x${metadata.height} -> ${resizeWidth}x${resizeHeight}`);
|
|
|
|
|
+
|
|
|
|
|
+ // 计算居中的偏移量
|
|
|
|
|
+ const offsetX = Math.floor((targetWidth - resizeWidth) / 2);
|
|
|
|
|
+ const offsetY = Math.floor((targetHeight - resizeHeight) / 2);
|
|
|
|
|
+
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 居中偏移 X=${offsetX}, Y=${offsetY}`);
|
|
|
|
|
+
|
|
|
|
|
+ // 先调整大小并居中
|
|
|
|
|
+ let resizedBuffer = await sharp(buffer)
|
|
|
|
|
+ .resize(resizeWidth, resizeHeight, {
|
|
|
|
|
+ fit: 'contain',
|
|
|
|
|
+ background: { r: 255, g: 255, b: 255 }
|
|
|
|
|
+ })
|
|
|
|
|
+ .extend({
|
|
|
|
|
+ top: offsetY,
|
|
|
|
|
+ bottom: targetHeight - resizeHeight - offsetY,
|
|
|
|
|
+ left: offsetX,
|
|
|
|
|
+ right: targetWidth - resizeWidth - offsetX,
|
|
|
|
|
+ background: { r: 255, g: 255, b: 255 }
|
|
|
|
|
+ })
|
|
|
|
|
+ .png()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+
|
|
|
|
|
+ // 在左右各添加10像素空白
|
|
|
|
|
+ const finalWidth = targetWidth + 20; // 左右各加10像素
|
|
|
|
|
+ const finalHeight = targetHeight;
|
|
|
|
|
+
|
|
|
|
|
+ resizedBuffer = await sharp(resizedBuffer)
|
|
|
|
|
+ .extend({
|
|
|
|
|
+ top: 0,
|
|
|
|
|
+ bottom: 0,
|
|
|
|
|
+ left: 10,
|
|
|
|
|
+ right: 10,
|
|
|
|
|
+ background: { r: 255, g: 255, b: 255 }
|
|
|
|
|
+ })
|
|
|
|
|
+ .png()
|
|
|
|
|
+ .toBuffer();
|
|
|
|
|
+
|
|
|
|
|
+ this.logger.debug(`区域 ${regionIndex}: 最终尺寸 ${finalWidth}x${finalHeight} (左右各加10像素空白)`);
|
|
|
|
|
+
|
|
|
|
|
+ return resizedBuffer;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ async bufferToTensor(buffer, width, height) {
|
|
|
|
|
+ // 获取实际图像尺寸(因为现在宽度增加了20像素)
|
|
|
|
|
+ const metadata = await sharp(buffer).metadata();
|
|
|
|
|
+ const actualWidth = metadata.width;
|
|
|
|
|
+ const actualHeight = metadata.height;
|
|
|
|
|
+
|
|
|
|
|
+ const imageData = await sharp(buffer)
|
|
|
|
|
+ .ensureAlpha()
|
|
|
|
|
+ .raw()
|
|
|
|
|
+ .toBuffer({ resolveWithObject: true });
|
|
|
|
|
+
|
|
|
|
|
+ // 使用实际尺寸创建张量
|
|
|
|
|
+ const inputData = new Float32Array(3 * actualHeight * actualWidth);
|
|
|
|
|
+ const data = imageData.data;
|
|
|
|
|
+
|
|
|
|
|
+ for (let i = 0; i < data.length; i += 4) {
|
|
|
|
|
+ const pixelIndex = Math.floor(i / 4);
|
|
|
|
|
+ const y = Math.floor(pixelIndex / actualWidth);
|
|
|
|
|
+ const x = pixelIndex % actualWidth;
|
|
|
|
|
+
|
|
|
|
|
+ // 使用灰度值填充三个通道
|
|
|
|
|
+ const grayValue = data[i] / 255.0;
|
|
|
|
|
+
|
|
|
|
|
+ for (let c = 0; c < 3; c++) {
|
|
|
|
|
+ const inputIndex = c * actualHeight * actualWidth + y * actualWidth + x;
|
|
|
|
|
+ if (inputIndex < inputData.length) {
|
|
|
|
|
+ inputData[inputIndex] = grayValue;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ return inputData;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
postprocessRecognition(outputs) {
|
|
postprocessRecognition(outputs) {
|
|
|
- console.log(' 📝 后处理识别结果详情:');
|
|
|
|
|
|
|
+ this.logger.debug('开始识别后处理');
|
|
|
|
|
|
|
|
try {
|
|
try {
|
|
|
const outputNames = this.recSession.outputNames;
|
|
const outputNames = this.recSession.outputNames;
|
|
|
const recognitionOutput = outputs[outputNames[0]];
|
|
const recognitionOutput = outputs[outputNames[0]];
|
|
|
|
|
|
|
|
if (!recognitionOutput) {
|
|
if (!recognitionOutput) {
|
|
|
- console.log(' ❌ 识别输出为空');
|
|
|
|
|
|
|
+ this.logger.debug('识别输出为空');
|
|
|
return { text: '', confidence: 0 };
|
|
return { text: '', confidence: 0 };
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
const data = recognitionOutput.data;
|
|
const data = recognitionOutput.data;
|
|
|
const [batch, seqLen, vocabSize] = recognitionOutput.dims;
|
|
const [batch, seqLen, vocabSize] = recognitionOutput.dims;
|
|
|
|
|
|
|
|
- console.log(` - 序列长度: ${seqLen}, 词汇表大小: ${vocabSize}`);
|
|
|
|
|
- console.log(` - 输出数据总数: ${data.length}`);
|
|
|
|
|
- console.log(` - 字符集大小: ${this.characterSet.length}`);
|
|
|
|
|
|
|
+ this.logger.debug(`序列长度: ${seqLen}, 词汇表大小: ${vocabSize}, 字符集大小: ${this.characterSet.length}`);
|
|
|
|
|
|
|
|
if (this.characterSet.length === 0) {
|
|
if (this.characterSet.length === 0) {
|
|
|
- console.log(' ❌ 字符集为空');
|
|
|
|
|
|
|
+ this.logger.error('字符集为空');
|
|
|
return { text: '', confidence: 0 };
|
|
return { text: '', confidence: 0 };
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 改进的CTC解码算法
|
|
|
|
|
- let text = '';
|
|
|
|
|
- let lastCharIndex = -1;
|
|
|
|
|
- let confidenceSum = 0;
|
|
|
|
|
- let charCount = 0;
|
|
|
|
|
-
|
|
|
|
|
- // 降低置信度阈值,提高召回率
|
|
|
|
|
- const confidenceThreshold = 0.05;
|
|
|
|
|
-
|
|
|
|
|
- console.log(' - 处理每个时间步:');
|
|
|
|
|
- for (let t = 0; t < seqLen; t++) {
|
|
|
|
|
- let maxProb = -1;
|
|
|
|
|
- let maxIndex = -1;
|
|
|
|
|
-
|
|
|
|
|
- // 找到当前时间步的最大概率字符
|
|
|
|
|
- for (let i = 0; i < vocabSize; i++) {
|
|
|
|
|
- const prob = data[t * vocabSize + i];
|
|
|
|
|
- if (prob > maxProb) {
|
|
|
|
|
- maxProb = prob;
|
|
|
|
|
- maxIndex = i;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ // 验证词汇表大小与字符集大小的匹配
|
|
|
|
|
+ if (vocabSize !== this.characterSet.length + 1) {
|
|
|
|
|
+ this.logger.warn(`词汇表大小(${vocabSize})与字符集大小(${this.characterSet.length})不匹配,可能影响识别效果`);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const { text, confidence } = this.ctcDecode(data, seqLen, vocabSize);
|
|
|
|
|
+ this.logger.debug(`解码结果: "${text}", 置信度: ${confidence.toFixed(4)}`);
|
|
|
|
|
+
|
|
|
|
|
+ return { text, confidence };
|
|
|
|
|
+
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ this.logger.error('识别后处理失败', error);
|
|
|
|
|
+ return { text: '', confidence: 0 };
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ctcDecode(data, seqLen, vocabSize) {
|
|
|
|
|
+ let text = '';
|
|
|
|
|
+ let lastCharIndex = -1;
|
|
|
|
|
+ let confidenceSum = 0;
|
|
|
|
|
+ let charCount = 0;
|
|
|
|
|
+
|
|
|
|
|
+ // 动态阈值调整
|
|
|
|
|
+ const baseThreshold = 0.03;
|
|
|
|
|
+ let confidenceThreshold = baseThreshold;
|
|
|
|
|
+
|
|
|
|
|
+ // 先分析整个序列的置信度分布
|
|
|
|
|
+ let maxSequenceProb = 0;
|
|
|
|
|
+ for (let t = 0; t < seqLen; t++) {
|
|
|
|
|
+ for (let i = 0; i < vocabSize; i++) {
|
|
|
|
|
+ maxSequenceProb = Math.max(maxSequenceProb, data[t * vocabSize + i]);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 如果整体置信度较低,降低阈值
|
|
|
|
|
+ if (maxSequenceProb < 0.5) {
|
|
|
|
|
+ confidenceThreshold = baseThreshold * 0.5;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ this.logger.debug(`使用解码阈值: ${confidenceThreshold.toFixed(4)}`);
|
|
|
|
|
+
|
|
|
|
|
+ for (let t = 0; t < seqLen; t++) {
|
|
|
|
|
+ let maxProb = -1;
|
|
|
|
|
+ let maxIndex = -1;
|
|
|
|
|
+
|
|
|
|
|
+ // 找到当前时间步的最大概率字符
|
|
|
|
|
+ for (let i = 0; i < vocabSize; i++) {
|
|
|
|
|
+ const prob = data[t * vocabSize + i];
|
|
|
|
|
+ if (prob > maxProb) {
|
|
|
|
|
+ maxProb = prob;
|
|
|
|
|
+ maxIndex = i;
|
|
|
}
|
|
}
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- // 改进的解码逻辑
|
|
|
|
|
- if (maxIndex > 0 && maxProb > confidenceThreshold) {
|
|
|
|
|
- const char = this.characterSet[maxIndex - 1] || '';
|
|
|
|
|
-
|
|
|
|
|
- // 放宽重复字符限制
|
|
|
|
|
- if (maxIndex !== lastCharIndex || maxProb > 0.8) {
|
|
|
|
|
- if (char && char.trim() !== '') {
|
|
|
|
|
- text += char;
|
|
|
|
|
- confidenceSum += maxProb;
|
|
|
|
|
- charCount++;
|
|
|
|
|
- console.log(` [位置 ${t}] 字符: "${char}", 置信度: ${maxProb.toFixed(4)}`);
|
|
|
|
|
- }
|
|
|
|
|
- lastCharIndex = maxIndex;
|
|
|
|
|
|
|
+ // 改进的CTC解码逻辑
|
|
|
|
|
+ if (maxIndex > 0 && maxProb > confidenceThreshold) {
|
|
|
|
|
+ const charIndex = maxIndex - 1;
|
|
|
|
|
+ if (charIndex < this.characterSet.length) {
|
|
|
|
|
+ const char = this.characterSet[charIndex];
|
|
|
|
|
+
|
|
|
|
|
+ // 更智能的重复字符处理
|
|
|
|
|
+ const shouldAddChar = maxIndex !== lastCharIndex ||
|
|
|
|
|
+ maxProb > 0.8 ||
|
|
|
|
|
+ (maxIndex === lastCharIndex && charCount > 0 && text[text.length - 1] !== char);
|
|
|
|
|
+
|
|
|
|
|
+ if (shouldAddChar && char && char.trim() !== '') {
|
|
|
|
|
+ text += char;
|
|
|
|
|
+ confidenceSum += maxProb;
|
|
|
|
|
+ charCount++;
|
|
|
}
|
|
}
|
|
|
- } else if (maxIndex === 0) {
|
|
|
|
|
- // 空白符,重置lastCharIndex
|
|
|
|
|
- lastCharIndex = -1;
|
|
|
|
|
|
|
+ lastCharIndex = maxIndex;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ this.logger.warn(`字符索引${charIndex}超出字符集范围(0-${this.characterSet.length-1})`);
|
|
|
}
|
|
}
|
|
|
|
|
+ } else if (maxIndex === 0) {
|
|
|
|
|
+ lastCharIndex = -1;
|
|
|
}
|
|
}
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- const avgConfidence = charCount > 0 ? confidenceSum / charCount : 0;
|
|
|
|
|
|
|
+ const avgConfidence = charCount > 0 ? confidenceSum / charCount : 0;
|
|
|
|
|
|
|
|
- console.log(` - 识别结果: "${text}"`);
|
|
|
|
|
- console.log(` - 字符数: ${charCount}, 平均置信度: ${avgConfidence.toFixed(4)}`);
|
|
|
|
|
|
|
+ // 基本的文本清理(不包含错误模式修复)
|
|
|
|
|
+ const cleanedText = this.basicTextCleaning(text);
|
|
|
|
|
|
|
|
- return {
|
|
|
|
|
- text: text,
|
|
|
|
|
- confidence: avgConfidence
|
|
|
|
|
- };
|
|
|
|
|
|
|
+ return {
|
|
|
|
|
+ text: cleanedText,
|
|
|
|
|
+ confidence: avgConfidence
|
|
|
|
|
+ };
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- } catch (error) {
|
|
|
|
|
- console.error(` ❌ 后处理失败: ${error.message}`);
|
|
|
|
|
- return { text: '', confidence: 0 };
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ basicTextCleaning(text) {
|
|
|
|
|
+ if (!text) return '';
|
|
|
|
|
+
|
|
|
|
|
+ let cleaned = text;
|
|
|
|
|
+
|
|
|
|
|
+ // 1. 移除过多的重复字符(保留合理的重复)
|
|
|
|
|
+ cleaned = cleaned.replace(/([^0-9])\1{2,}/g, '$1$1');
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 修复标点符号
|
|
|
|
|
+ cleaned = cleaned.replace(/∶/g, ':')
|
|
|
|
|
+ .replace(/《/g, '(')
|
|
|
|
|
+ .replace(/》/g, ')');
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 修复数字和百分号
|
|
|
|
|
+ cleaned = cleaned.replace(/(\d+)%%/g, '$1%');
|
|
|
|
|
+
|
|
|
|
|
+ return cleaned.trim();
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|