// server/utils/onnxOcrManager.js import { InferenceSession } from 'onnxruntime-node'; import fse from 'fs-extra'; import * as path from 'path'; import { fileURLToPath } from 'url'; import DetectionProcessor from './detectionProcessor.js'; import RecognitionProcessor from './recognitionProcessor.js'; import ImagePreprocessor from './imagePreprocessor.js'; import TextPostProcessor from './textPostProcessor.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); class OnnxOcrManager { constructor() { this.detSession = null; this.recSession = null; this.clsSession = null; this.isInitialized = false; this.modelDir = path.join(process.cwd(), 'models', 'ocr'); this.detModelPath = path.join(this.modelDir, 'Det', '中文_OCRv3.onnx'); this.recModelPath = path.join(this.modelDir, 'Rec', '中文简体_OCRv3.onnx'); this.clsModelPath = path.join(this.modelDir, 'Cls', '原始分类器模型.onnx'); this.keysPath = path.join(this.modelDir, 'Keys', '中文简体_OCRv3.txt'); this.detectionProcessor = new DetectionProcessor(); this.recognitionProcessor = new RecognitionProcessor(); this.imagePreprocessor = new ImagePreprocessor(); this.textPostProcessor = new TextPostProcessor(); // 更新默认配置,优化识别效果 this.defaultConfig = { language: 'ch', detLimitSideLen: 960, detThresh: 0.05, // 降低检测阈值 detBoxThresh: 0.1, // 降低框阈值 detUnclipRatio: 1.8, // 调整解压缩比例 maxTextLength: 50, // 增加最大文本长度 recImageHeight: 48, clsThresh: 0.8, // 降低分类阈值 minTextHeight: 2, // 降低最小文本高度 minTextWidth: 2, // 降低最小文本宽度 clusterDistance: 8, // 调整聚类距离 minClusterPoints: 2 // 降低最小聚类点数 }; } async initialize(config = {}) { if (this.isInitialized) { console.log('🔁 OCR管理器已初始化'); return; } try { console.log('🚀 开始初始化OCR管理器...'); await this.validateModelFiles(); await this.recognitionProcessor.loadCharacterSet(this.keysPath); const [detSession, recSession, clsSession] = await Promise.all([ InferenceSession.create(this.detModelPath, { executionProviders: ['cpu'] }), InferenceSession.create(this.recModelPath, { executionProviders: ['cpu'] }), InferenceSession.create(this.clsModelPath, { executionProviders: ['cpu'] }) ]); this.detSession = detSession; this.recSession = recSession; this.clsSession = clsSession; const mergedConfig = { ...this.defaultConfig, ...config }; this.detectionProcessor.initialize(this.detSession, mergedConfig); this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig); this.isInitialized = true; console.log('✅ OCR管理器初始化完成'); } catch (error) { console.error('❌ OCR管理器初始化失败:', error); throw error; } } async validateModelFiles() { const requiredFiles = [ { path: this.detModelPath, name: '检测模型' }, { path: this.recModelPath, name: '识别模型' }, { path: this.clsModelPath, name: '分类模型' }, { path: this.keysPath, name: '字符集文件' } ]; for (const { path: filePath, name } of requiredFiles) { const exists = await fse.pathExists(filePath); if (!exists) { throw new Error(`模型文件不存在: ${filePath}`); } } console.log('✅ 所有模型文件验证通过'); } async recognizeImage(imagePath, config = {}) { if (!this.isInitialized) { await this.initialize(config); } if (!imagePath || typeof imagePath !== 'string') { throw new Error(`无效的图片路径: ${imagePath}`); } if (!fse.existsSync(imagePath)) { throw new Error(`图片文件不存在: ${imagePath}`); } try { console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`); const startTime = Date.now(); const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config); const { processedImage } = preprocessResult; const textBoxes = await this.detectionProcessor.detectText(processedImage); const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes); const processingTime = Date.now() - startTime; const textBlocks = this.textPostProcessor.buildTextBlocks(recognitionResults); const imageInfo = await this.imagePreprocessor.getImageInfo(imagePath); const rawText = textBlocks.map(block => block.content).join('\n'); const overallConfidence = this.textPostProcessor.calculateOverallConfidence(recognitionResults); const result = { textBlocks, confidence: overallConfidence, processingTime, isOffline: true, imagePath, totalPages: 1, rawText, imageInfo, recognitionCount: recognitionResults.length }; console.log(`\n📊 OCR识别统计:`); console.log(` - 处理时间: ${processingTime}ms`); console.log(` - 检测区域: ${textBoxes.length} 个`); console.log(` - 成功识别: ${recognitionResults.length} 个`); console.log(` - 总体置信度: ${overallConfidence.toFixed(4)}`); console.log(` - 最终文本长度: ${rawText.length} 字符`); return result; } catch (error) { console.error(`❌ OCR识别失败: ${error.message}`); throw new Error(`OCR识别失败: ${error.message}`); } } getStatus() { return { isInitialized: this.isInitialized, isOffline: true, engine: 'PP-OCRv3 (ONNX Runtime)', version: '1.0.0', models: { detection: path.relative(process.cwd(), this.detModelPath), recognition: path.relative(process.cwd(), this.recModelPath), classification: path.relative(process.cwd(), this.clsModelPath), characterSet: this.recognitionProcessor.getCharacterSetSize() }, config: { detThresh: this.defaultConfig.detThresh, detBoxThresh: this.defaultConfig.detBoxThresh, clsThresh: this.defaultConfig.clsThresh, preprocessing: 'enabled with padding' }, backend: 'CPU' }; } async terminate() { if (this.detSession) { this.detSession.release(); this.detSession = null; } if (this.recSession) { this.recSession.release(); this.recSession = null; } if (this.clsSession) { this.clsSession.release(); this.clsSession = null; } this.isInitialized = false; console.log('🛑 OCR管理器已终止'); } } const onnxOcrManager = new OnnxOcrManager(); export default onnxOcrManager;