xuqinmin12
/
Electron-vue3-ts-offline


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
							// server/utils/onnxOcrManager.js
import { InferenceSession } from 'onnxruntime-node';
import fse from 'fs-extra';
import * as path from 'path';
import { fileURLToPath } from 'url';

import DetectionProcessor from './detectionProcessor.js';
import RecognitionProcessor from './recognitionProcessor.js';
import ImagePreprocessor from './imagePreprocessor.js';
import TextPostProcessor from './textPostProcessor.js';

const __dirname = path.dirname(fileURLToPath(import.meta.url));

class OnnxOcrManager {
    constructor() {
        this.detSession = null;
        this.recSession = null;
        this.clsSession = null;
        this.isInitialized = false;

        this.modelDir = path.join(process.cwd(), 'models', 'ocr');
        this.detModelPath = path.join(this.modelDir, 'Det', '中文_OCRv3.onnx');
        this.recModelPath = path.join(this.modelDir, 'Rec', '中文简体_OCRv3.onnx');
        this.clsModelPath = path.join(this.modelDir, 'Cls', '原始分类器模型.onnx');
        this.keysPath = path.join(this.modelDir, 'Keys', '中文简体_OCRv3.txt');

        this.detectionProcessor = new DetectionProcessor();
        this.recognitionProcessor = new RecognitionProcessor();
        this.imagePreprocessor = new ImagePreprocessor();
        this.textPostProcessor = new TextPostProcessor();

        // 更新默认配置，优化识别效果
        this.defaultConfig = {
            language: 'ch',
            detLimitSideLen: 960,
            detThresh: 0.05,        // 降低检测阈值
            detBoxThresh: 0.1,      // 降低框阈值
            detUnclipRatio: 1.8,    // 调整解压缩比例
            maxTextLength: 50,      // 增加最大文本长度
            recImageHeight: 48,
            clsThresh: 0.8,         // 降低分类阈值
            minTextHeight: 2,       // 降低最小文本高度
            minTextWidth: 2,        // 降低最小文本宽度
            clusterDistance: 8,     // 调整聚类距离
            minClusterPoints: 2     // 降低最小聚类点数
        };
    }

    async initialize(config = {}) {
        if (this.isInitialized) {
            console.log('🔁 OCR管理器已初始化');
            return;
        }

        try {
            console.log('🚀 开始初始化OCR管理器...');
            await this.validateModelFiles();
            await this.recognitionProcessor.loadCharacterSet(this.keysPath);

            const [detSession, recSession, clsSession] = await Promise.all([
                InferenceSession.create(this.detModelPath, { executionProviders: ['cpu'] }),
                InferenceSession.create(this.recModelPath, { executionProviders: ['cpu'] }),
                InferenceSession.create(this.clsModelPath, { executionProviders: ['cpu'] })
            ]);

            this.detSession = detSession;
            this.recSession = recSession;
            this.clsSession = clsSession;

            const mergedConfig = { ...this.defaultConfig, ...config };

            this.detectionProcessor.initialize(this.detSession, mergedConfig);
            this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);

            this.isInitialized = true;
            console.log('✅ OCR管理器初始化完成');

        } catch (error) {
            console.error('❌ OCR管理器初始化失败:', error);
            throw error;
        }
    }

    async validateModelFiles() {
        const requiredFiles = [
            { path: this.detModelPath, name: '检测模型' },
            { path: this.recModelPath, name: '识别模型' },
            { path: this.clsModelPath, name: '分类模型' },
            { path: this.keysPath, name: '字符集文件' }
        ];

        for (const { path: filePath, name } of requiredFiles) {
            const exists = await fse.pathExists(filePath);
            if (!exists) {
                throw new Error(`模型文件不存在: ${filePath}`);
            }
        }
        console.log('✅ 所有模型文件验证通过');
    }

    async recognizeImage(imagePath, config = {}) {
        if (!this.isInitialized) {
            await this.initialize(config);
        }

        if (!imagePath || typeof imagePath !== 'string') {
            throw new Error(`无效的图片路径: ${imagePath}`);
        }

        if (!fse.existsSync(imagePath)) {
            throw new Error(`图片文件不存在: ${imagePath}`);
        }

        try {
            console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`);
            const startTime = Date.now();

            const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
            const { processedImage } = preprocessResult;

            const textBoxes = await this.detectionProcessor.detectText(processedImage);
            const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);

            const processingTime = Date.now() - startTime;

            const textBlocks = this.textPostProcessor.buildTextBlocks(recognitionResults);
            const imageInfo = await this.imagePreprocessor.getImageInfo(imagePath);

            const rawText = textBlocks.map(block => block.content).join('\n');
            const overallConfidence = this.textPostProcessor.calculateOverallConfidence(recognitionResults);

            const result = {
                textBlocks,
                confidence: overallConfidence,
                processingTime,
                isOffline: true,
                imagePath,
                totalPages: 1,
                rawText,
                imageInfo,
                recognitionCount: recognitionResults.length
            };

            console.log(`\n📊 OCR识别统计:`);
            console.log(`   - 处理时间: ${processingTime}ms`);
            console.log(`   - 检测区域: ${textBoxes.length} 个`);
            console.log(`   - 成功识别: ${recognitionResults.length} 个`);
            console.log(`   - 总体置信度: ${overallConfidence.toFixed(4)}`);
            console.log(`   - 最终文本长度: ${rawText.length} 字符`);

            return result;

        } catch (error) {
            console.error(`❌ OCR识别失败: ${error.message}`);
            throw new Error(`OCR识别失败: ${error.message}`);
        }
    }

    getStatus() {
        return {
            isInitialized: this.isInitialized,
            isOffline: true,
            engine: 'PP-OCRv3 (ONNX Runtime)',
            version: '1.0.0',
            models: {
                detection: path.relative(process.cwd(), this.detModelPath),
                recognition: path.relative(process.cwd(), this.recModelPath),
                classification: path.relative(process.cwd(), this.clsModelPath),
                characterSet: this.recognitionProcessor.getCharacterSetSize()
            },
            config: {
                detThresh: this.defaultConfig.detThresh,
                detBoxThresh: this.defaultConfig.detBoxThresh,
                clsThresh: this.defaultConfig.clsThresh,
                preprocessing: 'enabled with padding'
            },
            backend: 'CPU'
        };
    }

    async terminate() {
        if (this.detSession) {
            this.detSession.release();
            this.detSession = null;
        }
        if (this.recSession) {
            this.recSession.release();
            this.recSession = null;
        }
        if (this.clsSession) {
            this.clsSession.release();
            this.clsSession = null;
        }
        this.isInitialized = false;
        console.log('🛑 OCR管理器已终止');
    }
}

const onnxOcrManager = new OnnxOcrManager();

export default onnxOcrManager;