recognitionProcessor.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. // server/utils/recognitionProcessor.js
  2. import TextDirectionClassifier from './textDirectionClassifier.js';
  3. import TextRecognizer from './textRecognizer.js';
  4. import TextRegionCropper from './textRegionCropper.js';
  5. class RecognitionProcessor {
  6. constructor() {
  7. this.recSession = null;
  8. this.clsSession = null;
  9. this.config = null;
  10. this.textDirectionClassifier = new TextDirectionClassifier();
  11. this.textRecognizer = new TextRecognizer();
  12. this.textRegionCropper = new TextRegionCropper();
  13. }
  14. initialize(recSession, clsSession, config) {
  15. this.recSession = recSession;
  16. this.clsSession = clsSession;
  17. this.config = config;
  18. this.textDirectionClassifier.initialize(clsSession, config);
  19. this.textRecognizer.initialize(recSession, config);
  20. }
  21. async loadCharacterSet(keysPath) {
  22. await this.textRecognizer.loadCharacterSet(keysPath);
  23. }
  24. getCharacterSetSize() {
  25. return this.textRecognizer.getCharacterSetSize();
  26. }
  27. async recognizeTextWithCls(processedImage, textBoxes) {
  28. const results = [];
  29. try {
  30. console.log(`🔄 开始处理 ${textBoxes.length} 个文本区域`);
  31. for (let i = 0; i < textBoxes.length; i++) {
  32. const box = textBoxes[i];
  33. try {
  34. console.log(`\n📦 处理区域 ${i + 1}/${textBoxes.length}, 置信度: ${box.confidence.toFixed(4)}`);
  35. const textRegion = await this.textRegionCropper.cropTextRegion(
  36. processedImage.buffer, box, i + 1
  37. );
  38. if (!textRegion) {
  39. console.log(`⏭️ 区域 ${i + 1}: 跳过无效区域`);
  40. continue;
  41. }
  42. const { clsResult, clsConfidence } = await this.textDirectionClassifier.classifyTextDirection(
  43. textRegion.buffer
  44. );
  45. let recognitionImage = textRegion.buffer;
  46. if (clsResult === 180 && clsConfidence > this.config.clsThresh) {
  47. console.log(`🔄 区域 ${i + 1}: 旋转 180°`);
  48. recognitionImage = await this.textRegionCropper.rotateImage(textRegion.buffer, 180);
  49. }
  50. const textResult = await this.textRecognizer.recognizeText(recognitionImage, i + 1);
  51. if (textResult.text && textResult.text.trim().length > 0 && textResult.confidence > 0.05) {
  52. const originalBox = this.scaleBoxToOriginalImage(box, processedImage);
  53. results.push({
  54. text: textResult.text.trim(),
  55. confidence: textResult.confidence * clsConfidence,
  56. box: originalBox,
  57. clsResult,
  58. clsConfidence,
  59. regionIndex: i + 1
  60. });
  61. console.log(`✅ 区域 ${i + 1}: 识别成功 "${textResult.text}"`);
  62. } else {
  63. console.log(`❌ 区域 ${i + 1}: 识别失败或置信度过低`);
  64. }
  65. } catch (error) {
  66. console.error(`💥 区域 ${i + 1}: 处理失败`, error.message);
  67. continue;
  68. }
  69. }
  70. console.log(`\n🎯 识别完成: ${results.length}/${textBoxes.length} 个区域成功`);
  71. return results;
  72. } catch (error) {
  73. console.error('❌ 整体识别失败:', error);
  74. throw error;
  75. }
  76. }
  77. scaleBoxToOriginalImage(box, processedImage) {
  78. const {
  79. scaleX, scaleY,
  80. paddingX, paddingY,
  81. originalWidth, originalHeight
  82. } = processedImage;
  83. const paddedX1 = box.x1 * scaleX;
  84. const paddedY1 = box.y1 * scaleY;
  85. const paddedX3 = box.x3 * scaleX;
  86. const paddedY3 = box.y3 * scaleY;
  87. const originalX1 = paddedX1 - paddingX;
  88. const originalY1 = paddedY1 - paddingY;
  89. const originalX3 = paddedX3 - paddingX;
  90. const originalY3 = paddedY3 - paddingY;
  91. const clamp = (value, max) => Math.max(0, Math.min(max, value));
  92. return {
  93. x1: clamp(originalX1, originalWidth - 1),
  94. y1: clamp(originalY1, originalHeight - 1),
  95. x2: clamp(originalX3, originalWidth - 1),
  96. y2: clamp(originalY1, originalHeight - 1),
  97. x3: clamp(originalX3, originalWidth - 1),
  98. y3: clamp(originalY3, originalHeight - 1),
  99. x4: clamp(originalX1, originalWidth - 1),
  100. y4: clamp(originalY3, originalHeight - 1),
  101. confidence: box.confidence
  102. };
  103. }
  104. }
  105. export default RecognitionProcessor;