Electron-vue3-ts-offline/server/utils/onnxOcrManager.js
2025-11-13 16:34:41 +08:00

201 行
7.6 KiB
JavaScript

// server/utils/onnxOcrManager.js
import { InferenceSession } from 'onnxruntime-node';
import fse from 'fs-extra';
import * as path from 'path';
import { fileURLToPath } from 'url';
import DetectionProcessor from './detectionProcessor.js';
import RecognitionProcessor from './recognitionProcessor.js';
import ImagePreprocessor from './imagePreprocessor.js';
import TextPostProcessor from './textPostProcessor.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
class OnnxOcrManager {
constructor() {
this.detSession = null;
this.recSession = null;
this.clsSession = null;
this.isInitialized = false;
this.modelDir = path.join(process.cwd(), 'models', 'ocr');
this.detModelPath = path.join(this.modelDir, 'Det', '中文_OCRv3.onnx');
this.recModelPath = path.join(this.modelDir, 'Rec', '中文简体_OCRv3.onnx');
this.clsModelPath = path.join(this.modelDir, 'Cls', '原始分类器模型.onnx');
this.keysPath = path.join(this.modelDir, 'Keys', '中文简体_OCRv3.txt');
this.detectionProcessor = new DetectionProcessor();
this.recognitionProcessor = new RecognitionProcessor();
this.imagePreprocessor = new ImagePreprocessor();
this.textPostProcessor = new TextPostProcessor();
// 更新默认配置,优化识别效果
this.defaultConfig = {
language: 'ch',
detLimitSideLen: 960,
detThresh: 0.05, // 降低检测阈值
detBoxThresh: 0.1, // 降低框阈值
detUnclipRatio: 1.8, // 调整解压缩比例
maxTextLength: 50, // 增加最大文本长度
recImageHeight: 48,
clsThresh: 0.8, // 降低分类阈值
minTextHeight: 2, // 降低最小文本高度
minTextWidth: 2, // 降低最小文本宽度
clusterDistance: 8, // 调整聚类距离
minClusterPoints: 2 // 降低最小聚类点数
};
}
async initialize(config = {}) {
if (this.isInitialized) {
console.log('🔁 OCR管理器已初始化');
return;
}
try {
console.log('🚀 开始初始化OCR管理器...');
await this.validateModelFiles();
await this.recognitionProcessor.loadCharacterSet(this.keysPath);
const [detSession, recSession, clsSession] = await Promise.all([
InferenceSession.create(this.detModelPath, { executionProviders: ['cpu'] }),
InferenceSession.create(this.recModelPath, { executionProviders: ['cpu'] }),
InferenceSession.create(this.clsModelPath, { executionProviders: ['cpu'] })
]);
this.detSession = detSession;
this.recSession = recSession;
this.clsSession = clsSession;
const mergedConfig = { ...this.defaultConfig, ...config };
this.detectionProcessor.initialize(this.detSession, mergedConfig);
this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);
this.isInitialized = true;
console.log('✅ OCR管理器初始化完成');
} catch (error) {
console.error('❌ OCR管理器初始化失败:', error);
throw error;
}
}
async validateModelFiles() {
const requiredFiles = [
{ path: this.detModelPath, name: '检测模型' },
{ path: this.recModelPath, name: '识别模型' },
{ path: this.clsModelPath, name: '分类模型' },
{ path: this.keysPath, name: '字符集文件' }
];
for (const { path: filePath, name } of requiredFiles) {
const exists = await fse.pathExists(filePath);
if (!exists) {
throw new Error(`模型文件不存在: ${filePath}`);
}
}
console.log('✅ 所有模型文件验证通过');
}
async recognizeImage(imagePath, config = {}) {
if (!this.isInitialized) {
await this.initialize(config);
}
if (!imagePath || typeof imagePath !== 'string') {
throw new Error(`无效的图片路径: ${imagePath}`);
}
if (!fse.existsSync(imagePath)) {
throw new Error(`图片文件不存在: ${imagePath}`);
}
try {
console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`);
const startTime = Date.now();
const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
const { processedImage } = preprocessResult;
const textBoxes = await this.detectionProcessor.detectText(processedImage);
const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);
const processingTime = Date.now() - startTime;
const textBlocks = this.textPostProcessor.buildTextBlocks(recognitionResults);
const imageInfo = await this.imagePreprocessor.getImageInfo(imagePath);
const rawText = textBlocks.map(block => block.content).join('\n');
const overallConfidence = this.textPostProcessor.calculateOverallConfidence(recognitionResults);
const result = {
textBlocks,
confidence: overallConfidence,
processingTime,
isOffline: true,
imagePath,
totalPages: 1,
rawText,
imageInfo,
recognitionCount: recognitionResults.length
};
console.log(`\n📊 OCR识别统计:`);
console.log(` - 处理时间: ${processingTime}ms`);
console.log(` - 检测区域: ${textBoxes.length}`);
console.log(` - 成功识别: ${recognitionResults.length}`);
console.log(` - 总体置信度: ${overallConfidence.toFixed(4)}`);
console.log(` - 最终文本长度: ${rawText.length} 字符`);
return result;
} catch (error) {
console.error(`❌ OCR识别失败: ${error.message}`);
throw new Error(`OCR识别失败: ${error.message}`);
}
}
getStatus() {
return {
isInitialized: this.isInitialized,
isOffline: true,
engine: 'PP-OCRv3 (ONNX Runtime)',
version: '1.0.0',
models: {
detection: path.relative(process.cwd(), this.detModelPath),
recognition: path.relative(process.cwd(), this.recModelPath),
classification: path.relative(process.cwd(), this.clsModelPath),
characterSet: this.recognitionProcessor.getCharacterSetSize()
},
config: {
detThresh: this.defaultConfig.detThresh,
detBoxThresh: this.defaultConfig.detBoxThresh,
clsThresh: this.defaultConfig.clsThresh,
preprocessing: 'enabled with padding'
},
backend: 'CPU'
};
}
async terminate() {
if (this.detSession) {
this.detSession.release();
this.detSession = null;
}
if (this.recSession) {
this.recSession.release();
this.recSession = null;
}
if (this.clsSession) {
this.clsSession.release();
this.clsSession = null;
}
this.isInitialized = false;
console.log('🛑 OCR管理器已终止');
}
}
const onnxOcrManager = new OnnxOcrManager();
export default onnxOcrManager;