201 行
7.6 KiB
JavaScript
201 行
7.6 KiB
JavaScript
|
|
// server/utils/onnxOcrManager.js
|
||
|
|
import { InferenceSession } from 'onnxruntime-node';
|
||
|
|
import fse from 'fs-extra';
|
||
|
|
import * as path from 'path';
|
||
|
|
import { fileURLToPath } from 'url';
|
||
|
|
|
||
|
|
import DetectionProcessor from './detectionProcessor.js';
|
||
|
|
import RecognitionProcessor from './recognitionProcessor.js';
|
||
|
|
import ImagePreprocessor from './imagePreprocessor.js';
|
||
|
|
import TextPostProcessor from './textPostProcessor.js';
|
||
|
|
|
||
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||
|
|
|
||
|
|
class OnnxOcrManager {
|
||
|
|
constructor() {
|
||
|
|
this.detSession = null;
|
||
|
|
this.recSession = null;
|
||
|
|
this.clsSession = null;
|
||
|
|
this.isInitialized = false;
|
||
|
|
|
||
|
|
this.modelDir = path.join(process.cwd(), 'models', 'ocr');
|
||
|
|
this.detModelPath = path.join(this.modelDir, 'Det', '中文_OCRv3.onnx');
|
||
|
|
this.recModelPath = path.join(this.modelDir, 'Rec', '中文简体_OCRv3.onnx');
|
||
|
|
this.clsModelPath = path.join(this.modelDir, 'Cls', '原始分类器模型.onnx');
|
||
|
|
this.keysPath = path.join(this.modelDir, 'Keys', '中文简体_OCRv3.txt');
|
||
|
|
|
||
|
|
this.detectionProcessor = new DetectionProcessor();
|
||
|
|
this.recognitionProcessor = new RecognitionProcessor();
|
||
|
|
this.imagePreprocessor = new ImagePreprocessor();
|
||
|
|
this.textPostProcessor = new TextPostProcessor();
|
||
|
|
|
||
|
|
// 更新默认配置,优化识别效果
|
||
|
|
this.defaultConfig = {
|
||
|
|
language: 'ch',
|
||
|
|
detLimitSideLen: 960,
|
||
|
|
detThresh: 0.05, // 降低检测阈值
|
||
|
|
detBoxThresh: 0.1, // 降低框阈值
|
||
|
|
detUnclipRatio: 1.8, // 调整解压缩比例
|
||
|
|
maxTextLength: 50, // 增加最大文本长度
|
||
|
|
recImageHeight: 48,
|
||
|
|
clsThresh: 0.8, // 降低分类阈值
|
||
|
|
minTextHeight: 2, // 降低最小文本高度
|
||
|
|
minTextWidth: 2, // 降低最小文本宽度
|
||
|
|
clusterDistance: 8, // 调整聚类距离
|
||
|
|
minClusterPoints: 2 // 降低最小聚类点数
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
async initialize(config = {}) {
|
||
|
|
if (this.isInitialized) {
|
||
|
|
console.log('🔁 OCR管理器已初始化');
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
console.log('🚀 开始初始化OCR管理器...');
|
||
|
|
await this.validateModelFiles();
|
||
|
|
await this.recognitionProcessor.loadCharacterSet(this.keysPath);
|
||
|
|
|
||
|
|
const [detSession, recSession, clsSession] = await Promise.all([
|
||
|
|
InferenceSession.create(this.detModelPath, { executionProviders: ['cpu'] }),
|
||
|
|
InferenceSession.create(this.recModelPath, { executionProviders: ['cpu'] }),
|
||
|
|
InferenceSession.create(this.clsModelPath, { executionProviders: ['cpu'] })
|
||
|
|
]);
|
||
|
|
|
||
|
|
this.detSession = detSession;
|
||
|
|
this.recSession = recSession;
|
||
|
|
this.clsSession = clsSession;
|
||
|
|
|
||
|
|
const mergedConfig = { ...this.defaultConfig, ...config };
|
||
|
|
|
||
|
|
this.detectionProcessor.initialize(this.detSession, mergedConfig);
|
||
|
|
this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);
|
||
|
|
|
||
|
|
this.isInitialized = true;
|
||
|
|
console.log('✅ OCR管理器初始化完成');
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error('❌ OCR管理器初始化失败:', error);
|
||
|
|
throw error;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
async validateModelFiles() {
|
||
|
|
const requiredFiles = [
|
||
|
|
{ path: this.detModelPath, name: '检测模型' },
|
||
|
|
{ path: this.recModelPath, name: '识别模型' },
|
||
|
|
{ path: this.clsModelPath, name: '分类模型' },
|
||
|
|
{ path: this.keysPath, name: '字符集文件' }
|
||
|
|
];
|
||
|
|
|
||
|
|
for (const { path: filePath, name } of requiredFiles) {
|
||
|
|
const exists = await fse.pathExists(filePath);
|
||
|
|
if (!exists) {
|
||
|
|
throw new Error(`模型文件不存在: ${filePath}`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
console.log('✅ 所有模型文件验证通过');
|
||
|
|
}
|
||
|
|
|
||
|
|
async recognizeImage(imagePath, config = {}) {
|
||
|
|
if (!this.isInitialized) {
|
||
|
|
await this.initialize(config);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!imagePath || typeof imagePath !== 'string') {
|
||
|
|
throw new Error(`无效的图片路径: ${imagePath}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!fse.existsSync(imagePath)) {
|
||
|
|
throw new Error(`图片文件不存在: ${imagePath}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`);
|
||
|
|
const startTime = Date.now();
|
||
|
|
|
||
|
|
const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
|
||
|
|
const { processedImage } = preprocessResult;
|
||
|
|
|
||
|
|
const textBoxes = await this.detectionProcessor.detectText(processedImage);
|
||
|
|
const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);
|
||
|
|
|
||
|
|
const processingTime = Date.now() - startTime;
|
||
|
|
|
||
|
|
const textBlocks = this.textPostProcessor.buildTextBlocks(recognitionResults);
|
||
|
|
const imageInfo = await this.imagePreprocessor.getImageInfo(imagePath);
|
||
|
|
|
||
|
|
const rawText = textBlocks.map(block => block.content).join('\n');
|
||
|
|
const overallConfidence = this.textPostProcessor.calculateOverallConfidence(recognitionResults);
|
||
|
|
|
||
|
|
const result = {
|
||
|
|
textBlocks,
|
||
|
|
confidence: overallConfidence,
|
||
|
|
processingTime,
|
||
|
|
isOffline: true,
|
||
|
|
imagePath,
|
||
|
|
totalPages: 1,
|
||
|
|
rawText,
|
||
|
|
imageInfo,
|
||
|
|
recognitionCount: recognitionResults.length
|
||
|
|
};
|
||
|
|
|
||
|
|
console.log(`\n📊 OCR识别统计:`);
|
||
|
|
console.log(` - 处理时间: ${processingTime}ms`);
|
||
|
|
console.log(` - 检测区域: ${textBoxes.length} 个`);
|
||
|
|
console.log(` - 成功识别: ${recognitionResults.length} 个`);
|
||
|
|
console.log(` - 总体置信度: ${overallConfidence.toFixed(4)}`);
|
||
|
|
console.log(` - 最终文本长度: ${rawText.length} 字符`);
|
||
|
|
|
||
|
|
return result;
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error(`❌ OCR识别失败: ${error.message}`);
|
||
|
|
throw new Error(`OCR识别失败: ${error.message}`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
getStatus() {
|
||
|
|
return {
|
||
|
|
isInitialized: this.isInitialized,
|
||
|
|
isOffline: true,
|
||
|
|
engine: 'PP-OCRv3 (ONNX Runtime)',
|
||
|
|
version: '1.0.0',
|
||
|
|
models: {
|
||
|
|
detection: path.relative(process.cwd(), this.detModelPath),
|
||
|
|
recognition: path.relative(process.cwd(), this.recModelPath),
|
||
|
|
classification: path.relative(process.cwd(), this.clsModelPath),
|
||
|
|
characterSet: this.recognitionProcessor.getCharacterSetSize()
|
||
|
|
},
|
||
|
|
config: {
|
||
|
|
detThresh: this.defaultConfig.detThresh,
|
||
|
|
detBoxThresh: this.defaultConfig.detBoxThresh,
|
||
|
|
clsThresh: this.defaultConfig.clsThresh,
|
||
|
|
preprocessing: 'enabled with padding'
|
||
|
|
},
|
||
|
|
backend: 'CPU'
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
async terminate() {
|
||
|
|
if (this.detSession) {
|
||
|
|
this.detSession.release();
|
||
|
|
this.detSession = null;
|
||
|
|
}
|
||
|
|
if (this.recSession) {
|
||
|
|
this.recSession.release();
|
||
|
|
this.recSession = null;
|
||
|
|
}
|
||
|
|
if (this.clsSession) {
|
||
|
|
this.clsSession.release();
|
||
|
|
this.clsSession = null;
|
||
|
|
}
|
||
|
|
this.isInitialized = false;
|
||
|
|
console.log('🛑 OCR管理器已终止');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const onnxOcrManager = new OnnxOcrManager();
|
||
|
|
|
||
|
|
export default onnxOcrManager;
|