onnxOcrManager.js 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. // server/utils/onnxOcrManager.js
  2. import { InferenceSession } from 'onnxruntime-node';
  3. import fse from 'fs-extra';
  4. import * as path from 'path';
  5. import { fileURLToPath } from 'url';
  6. import DetectionProcessor from './detectionProcessor.js';
  7. import RecognitionProcessor from './recognitionProcessor.js';
  8. import ImagePreprocessor from './imagePreprocessor.js';
  9. import TextPostProcessor from './textPostProcessor.js';
  10. const __dirname = path.dirname(fileURLToPath(import.meta.url));
  11. class OnnxOcrManager {
  12. constructor() {
  13. this.detSession = null;
  14. this.recSession = null;
  15. this.clsSession = null;
  16. this.isInitialized = false;
  17. this.modelDir = path.join(process.cwd(), 'models', 'ocr');
  18. this.detModelPath = path.join(this.modelDir, 'Det', '中文_OCRv3.onnx');
  19. this.recModelPath = path.join(this.modelDir, 'Rec', '中文简体_OCRv3.onnx');
  20. this.clsModelPath = path.join(this.modelDir, 'Cls', '原始分类器模型.onnx');
  21. this.keysPath = path.join(this.modelDir, 'Keys', '中文简体_OCRv3.txt');
  22. this.detectionProcessor = new DetectionProcessor();
  23. this.recognitionProcessor = new RecognitionProcessor();
  24. this.imagePreprocessor = new ImagePreprocessor();
  25. this.textPostProcessor = new TextPostProcessor();
  26. // 更新默认配置,优化识别效果
  27. this.defaultConfig = {
  28. language: 'ch',
  29. detLimitSideLen: 960,
  30. detThresh: 0.05, // 降低检测阈值
  31. detBoxThresh: 0.1, // 降低框阈值
  32. detUnclipRatio: 1.8, // 调整解压缩比例
  33. maxTextLength: 50, // 增加最大文本长度
  34. recImageHeight: 48,
  35. clsThresh: 0.8, // 降低分类阈值
  36. minTextHeight: 2, // 降低最小文本高度
  37. minTextWidth: 2, // 降低最小文本宽度
  38. clusterDistance: 8, // 调整聚类距离
  39. minClusterPoints: 2 // 降低最小聚类点数
  40. };
  41. }
  42. async initialize(config = {}) {
  43. if (this.isInitialized) {
  44. console.log('🔁 OCR管理器已初始化');
  45. return;
  46. }
  47. try {
  48. console.log('🚀 开始初始化OCR管理器...');
  49. await this.validateModelFiles();
  50. await this.recognitionProcessor.loadCharacterSet(this.keysPath);
  51. const [detSession, recSession, clsSession] = await Promise.all([
  52. InferenceSession.create(this.detModelPath, { executionProviders: ['cpu'] }),
  53. InferenceSession.create(this.recModelPath, { executionProviders: ['cpu'] }),
  54. InferenceSession.create(this.clsModelPath, { executionProviders: ['cpu'] })
  55. ]);
  56. this.detSession = detSession;
  57. this.recSession = recSession;
  58. this.clsSession = clsSession;
  59. const mergedConfig = { ...this.defaultConfig, ...config };
  60. this.detectionProcessor.initialize(this.detSession, mergedConfig);
  61. this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);
  62. this.isInitialized = true;
  63. console.log('✅ OCR管理器初始化完成');
  64. } catch (error) {
  65. console.error('❌ OCR管理器初始化失败:', error);
  66. throw error;
  67. }
  68. }
  69. async validateModelFiles() {
  70. const requiredFiles = [
  71. { path: this.detModelPath, name: '检测模型' },
  72. { path: this.recModelPath, name: '识别模型' },
  73. { path: this.clsModelPath, name: '分类模型' },
  74. { path: this.keysPath, name: '字符集文件' }
  75. ];
  76. for (const { path: filePath, name } of requiredFiles) {
  77. const exists = await fse.pathExists(filePath);
  78. if (!exists) {
  79. throw new Error(`模型文件不存在: ${filePath}`);
  80. }
  81. }
  82. console.log('✅ 所有模型文件验证通过');
  83. }
  84. async recognizeImage(imagePath, config = {}) {
  85. if (!this.isInitialized) {
  86. await this.initialize(config);
  87. }
  88. if (!imagePath || typeof imagePath !== 'string') {
  89. throw new Error(`无效的图片路径: ${imagePath}`);
  90. }
  91. if (!fse.existsSync(imagePath)) {
  92. throw new Error(`图片文件不存在: ${imagePath}`);
  93. }
  94. try {
  95. console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`);
  96. const startTime = Date.now();
  97. const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
  98. const { processedImage } = preprocessResult;
  99. const textBoxes = await this.detectionProcessor.detectText(processedImage);
  100. const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);
  101. const processingTime = Date.now() - startTime;
  102. const textBlocks = this.textPostProcessor.buildTextBlocks(recognitionResults);
  103. const imageInfo = await this.imagePreprocessor.getImageInfo(imagePath);
  104. const rawText = textBlocks.map(block => block.content).join('\n');
  105. const overallConfidence = this.textPostProcessor.calculateOverallConfidence(recognitionResults);
  106. const result = {
  107. textBlocks,
  108. confidence: overallConfidence,
  109. processingTime,
  110. isOffline: true,
  111. imagePath,
  112. totalPages: 1,
  113. rawText,
  114. imageInfo,
  115. recognitionCount: recognitionResults.length
  116. };
  117. console.log(`\n📊 OCR识别统计:`);
  118. console.log(` - 处理时间: ${processingTime}ms`);
  119. console.log(` - 检测区域: ${textBoxes.length} 个`);
  120. console.log(` - 成功识别: ${recognitionResults.length} 个`);
  121. console.log(` - 总体置信度: ${overallConfidence.toFixed(4)}`);
  122. console.log(` - 最终文本长度: ${rawText.length} 字符`);
  123. return result;
  124. } catch (error) {
  125. console.error(`❌ OCR识别失败: ${error.message}`);
  126. throw new Error(`OCR识别失败: ${error.message}`);
  127. }
  128. }
  129. getStatus() {
  130. return {
  131. isInitialized: this.isInitialized,
  132. isOffline: true,
  133. engine: 'PP-OCRv3 (ONNX Runtime)',
  134. version: '1.0.0',
  135. models: {
  136. detection: path.relative(process.cwd(), this.detModelPath),
  137. recognition: path.relative(process.cwd(), this.recModelPath),
  138. classification: path.relative(process.cwd(), this.clsModelPath),
  139. characterSet: this.recognitionProcessor.getCharacterSetSize()
  140. },
  141. config: {
  142. detThresh: this.defaultConfig.detThresh,
  143. detBoxThresh: this.defaultConfig.detBoxThresh,
  144. clsThresh: this.defaultConfig.clsThresh,
  145. preprocessing: 'enabled with padding'
  146. },
  147. backend: 'CPU'
  148. };
  149. }
  150. async terminate() {
  151. if (this.detSession) {
  152. this.detSession.release();
  153. this.detSession = null;
  154. }
  155. if (this.recSession) {
  156. this.recSession.release();
  157. this.recSession = null;
  158. }
  159. if (this.clsSession) {
  160. this.clsSession.release();
  161. this.clsSession = null;
  162. }
  163. this.isInitialized = false;
  164. console.log('🛑 OCR管理器已终止');
  165. }
  166. }
  167. const onnxOcrManager = new OnnxOcrManager();
  168. export default onnxOcrManager;