| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- // server/utils/onnxOcrManager.js
- import { InferenceSession } from 'onnxruntime-node';
- import fse from 'fs-extra';
- import * as path from 'path';
- import { fileURLToPath } from 'url';
- import DetectionProcessor from './detectionProcessor.js';
- import RecognitionProcessor from './recognitionProcessor.js';
- import ImagePreprocessor from './imagePreprocessor.js';
- import TextPostProcessor from './textPostProcessor.js';
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
- class OnnxOcrManager {
- constructor() {
- this.detSession = null;
- this.recSession = null;
- this.clsSession = null;
- this.isInitialized = false;
- this.modelDir = path.join(process.cwd(), 'models', 'ocr');
- this.detModelPath = path.join(this.modelDir, 'Det', '中文_OCRv3.onnx');
- this.recModelPath = path.join(this.modelDir, 'Rec', '中文简体_OCRv3.onnx');
- this.clsModelPath = path.join(this.modelDir, 'Cls', '原始分类器模型.onnx');
- this.keysPath = path.join(this.modelDir, 'Keys', '中文简体_OCRv3.txt');
- this.detectionProcessor = new DetectionProcessor();
- this.recognitionProcessor = new RecognitionProcessor();
- this.imagePreprocessor = new ImagePreprocessor();
- this.textPostProcessor = new TextPostProcessor();
- // 更新默认配置,优化识别效果
- this.defaultConfig = {
- language: 'ch',
- detLimitSideLen: 960,
- detThresh: 0.05, // 降低检测阈值
- detBoxThresh: 0.1, // 降低框阈值
- detUnclipRatio: 1.8, // 调整解压缩比例
- maxTextLength: 50, // 增加最大文本长度
- recImageHeight: 48,
- clsThresh: 0.8, // 降低分类阈值
- minTextHeight: 2, // 降低最小文本高度
- minTextWidth: 2, // 降低最小文本宽度
- clusterDistance: 8, // 调整聚类距离
- minClusterPoints: 2 // 降低最小聚类点数
- };
- }
- async initialize(config = {}) {
- if (this.isInitialized) {
- console.log('🔁 OCR管理器已初始化');
- return;
- }
- try {
- console.log('🚀 开始初始化OCR管理器...');
- await this.validateModelFiles();
- await this.recognitionProcessor.loadCharacterSet(this.keysPath);
- const [detSession, recSession, clsSession] = await Promise.all([
- InferenceSession.create(this.detModelPath, { executionProviders: ['cpu'] }),
- InferenceSession.create(this.recModelPath, { executionProviders: ['cpu'] }),
- InferenceSession.create(this.clsModelPath, { executionProviders: ['cpu'] })
- ]);
- this.detSession = detSession;
- this.recSession = recSession;
- this.clsSession = clsSession;
- const mergedConfig = { ...this.defaultConfig, ...config };
- this.detectionProcessor.initialize(this.detSession, mergedConfig);
- this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);
- this.isInitialized = true;
- console.log('✅ OCR管理器初始化完成');
- } catch (error) {
- console.error('❌ OCR管理器初始化失败:', error);
- throw error;
- }
- }
- async validateModelFiles() {
- const requiredFiles = [
- { path: this.detModelPath, name: '检测模型' },
- { path: this.recModelPath, name: '识别模型' },
- { path: this.clsModelPath, name: '分类模型' },
- { path: this.keysPath, name: '字符集文件' }
- ];
- for (const { path: filePath, name } of requiredFiles) {
- const exists = await fse.pathExists(filePath);
- if (!exists) {
- throw new Error(`模型文件不存在: ${filePath}`);
- }
- }
- console.log('✅ 所有模型文件验证通过');
- }
- async recognizeImage(imagePath, config = {}) {
- if (!this.isInitialized) {
- await this.initialize(config);
- }
- if (!imagePath || typeof imagePath !== 'string') {
- throw new Error(`无效的图片路径: ${imagePath}`);
- }
- if (!fse.existsSync(imagePath)) {
- throw new Error(`图片文件不存在: ${imagePath}`);
- }
- try {
- console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`);
- const startTime = Date.now();
- const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
- const { processedImage } = preprocessResult;
- const textBoxes = await this.detectionProcessor.detectText(processedImage);
- const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);
- const processingTime = Date.now() - startTime;
- const textBlocks = this.textPostProcessor.buildTextBlocks(recognitionResults);
- const imageInfo = await this.imagePreprocessor.getImageInfo(imagePath);
- const rawText = textBlocks.map(block => block.content).join('\n');
- const overallConfidence = this.textPostProcessor.calculateOverallConfidence(recognitionResults);
- const result = {
- textBlocks,
- confidence: overallConfidence,
- processingTime,
- isOffline: true,
- imagePath,
- totalPages: 1,
- rawText,
- imageInfo,
- recognitionCount: recognitionResults.length
- };
- console.log(`\n📊 OCR识别统计:`);
- console.log(` - 处理时间: ${processingTime}ms`);
- console.log(` - 检测区域: ${textBoxes.length} 个`);
- console.log(` - 成功识别: ${recognitionResults.length} 个`);
- console.log(` - 总体置信度: ${overallConfidence.toFixed(4)}`);
- console.log(` - 最终文本长度: ${rawText.length} 字符`);
- return result;
- } catch (error) {
- console.error(`❌ OCR识别失败: ${error.message}`);
- throw new Error(`OCR识别失败: ${error.message}`);
- }
- }
- getStatus() {
- return {
- isInitialized: this.isInitialized,
- isOffline: true,
- engine: 'PP-OCRv3 (ONNX Runtime)',
- version: '1.0.0',
- models: {
- detection: path.relative(process.cwd(), this.detModelPath),
- recognition: path.relative(process.cwd(), this.recModelPath),
- classification: path.relative(process.cwd(), this.clsModelPath),
- characterSet: this.recognitionProcessor.getCharacterSetSize()
- },
- config: {
- detThresh: this.defaultConfig.detThresh,
- detBoxThresh: this.defaultConfig.detBoxThresh,
- clsThresh: this.defaultConfig.clsThresh,
- preprocessing: 'enabled with padding'
- },
- backend: 'CPU'
- };
- }
- async terminate() {
- if (this.detSession) {
- this.detSession.release();
- this.detSession = null;
- }
- if (this.recSession) {
- this.recSession.release();
- this.recSession = null;
- }
- if (this.clsSession) {
- this.clsSession.release();
- this.clsSession = null;
- }
- this.isInitialized = false;
- console.log('🛑 OCR管理器已终止');
- }
- }
- const onnxOcrManager = new OnnxOcrManager();
- export default onnxOcrManager;
|