// server/utils/detectionProcessor.js import { Tensor } from 'onnxruntime-node'; import sharp from 'sharp'; class DetectionProcessor { constructor() { this.session = null; this.config = null; } initialize(session, config) { this.session = session; this.config = config; } async detectText(processedImage) { try { const inputTensor = await this.prepareDetectionInput(processedImage); const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor }); const textBoxes = this.postprocessDetection(outputs, processedImage); return textBoxes; } catch (error) { console.error('文本检测失败:', error); return []; } } async prepareDetectionInput(processedImage) { const { buffer, width, height } = processedImage; const imageData = await sharp(buffer) .ensureAlpha() .raw() .toBuffer({ resolveWithObject: true }); const inputData = new Float32Array(3 * height * width); const data = imageData.data; const channels = imageData.info.channels; for (let i = 0; i < data.length; i += channels) { const pixelIndex = Math.floor(i / channels); const channel = Math.floor(pixelIndex / (height * width)); const posInChannel = pixelIndex % (height * width); if (channel < 3) { const y = Math.floor(posInChannel / width); const x = posInChannel % width; const inputIndex = channel * height * width + y * width + x; if (inputIndex < inputData.length) { inputData[inputIndex] = data[i] / 255.0; } } } return new Tensor('float32', inputData, [1, 3, height, width]); } postprocessDetection(outputs, processedImage) { try { const boxes = []; const outputNames = this.session.outputNames; const detectionOutput = outputs[outputNames[0]]; if (!detectionOutput) { return boxes; } const [batch, channels, height, width] = detectionOutput.dims; const data = detectionOutput.data; // 降低检测阈值,提高召回率 const threshold = this.config.detThresh || 0.05; const points = []; // 改进的点收集逻辑 for (let y = 0; y < height; y++) { for (let x = 0; x < width; x++) { const idx = y * width + x; const prob = data[idx]; if (prob > threshold) { points.push({ x, y, prob, localMax: this.isLocalMaximum(data, x, y, width, height, 2) }); } } } if (points.length === 0) { return boxes; } // 改进的聚类算法 const clusters = this.enhancedCluster(points, 8); for (const cluster of clusters) { // 降低最小点数要求 if (cluster.length < 2) continue; const minX = Math.min(...cluster.map(p => p.x)); const maxX = Math.max(...cluster.map(p => p.x)); const minY = Math.min(...cluster.map(p => p.y)); const maxY = Math.max(...cluster.map(p => p.y)); const boxWidth = maxX - minX; const boxHeight = maxY - minY; // 放宽尺寸限制 if (boxWidth < 2 || boxHeight < 2) continue; const aspectRatio = boxWidth / boxHeight; // 放宽宽高比限制 if (aspectRatio > 100 || aspectRatio < 0.01) continue; const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length; // 降低框置信度阈值 const boxThreshold = this.config.detBoxThresh || 0.1; if (avgConfidence > boxThreshold) { const box = this.scaleBoxToProcessedImage({ x1: minX, y1: minY, x2: maxX, y2: minY, x3: maxX, y3: maxY, x4: minX, y4: maxY }, processedImage); box.confidence = avgConfidence; boxes.push(box); } } boxes.sort((a, b) => b.confidence - a.confidence); console.log(`✅ 检测到 ${boxes.length} 个文本区域`); return boxes; } catch (error) { console.error('检测后处理错误:', error); return []; } } // 添加局部最大值检测 isLocalMaximum(data, x, y, width, height, radius) { const centerProb = data[y * width + x]; for (let dy = -radius; dy <= radius; dy++) { for (let dx = -radius; dx <= radius; dx++) { if (dx === 0 && dy === 0) continue; const nx = x + dx; const ny = y + dy; if (nx >= 0 && nx < width && ny >= 0 && ny < height) { if (data[ny * width + nx] > centerProb) { return false; } } } } return true; } // 改进的聚类算法 enhancedCluster(points, distanceThreshold) { const clusters = []; const visited = new Set(); // 按概率降序排序,优先处理高置信度点 const sortedPoints = [...points].sort((a, b) => b.prob - a.prob); for (let i = 0; i < sortedPoints.length; i++) { if (visited.has(i)) continue; const cluster = []; const queue = [i]; visited.add(i); while (queue.length > 0) { const currentIndex = queue.shift(); const currentPoint = sortedPoints[currentIndex]; cluster.push(currentPoint); // 动态调整搜索半径 const adaptiveThreshold = distanceThreshold * (1 + (1 - currentPoint.prob) * 0.5); for (let j = 0; j < sortedPoints.length; j++) { if (visited.has(j)) continue; const targetPoint = sortedPoints[j]; const dist = Math.sqrt( Math.pow(targetPoint.x - currentPoint.x, 2) + Math.pow(targetPoint.y - currentPoint.y, 2) ); if (dist < adaptiveThreshold) { queue.push(j); visited.add(j); } } } if (cluster.length > 0) { clusters.push(cluster); } } return clusters; } scaleBoxToProcessedImage(box, processedImage) { const { width: processedWidth, height: processedHeight } = processedImage; const scaledBox = { x1: box.x1, y1: box.y1, x2: box.x2, y2: box.y2, x3: box.x3, y3: box.y3, x4: box.x4, y4: box.y4 }; const clamp = (value, max) => Math.max(0, Math.min(max, value)); return { x1: clamp(scaledBox.x1, processedWidth - 1), y1: clamp(scaledBox.y1, processedHeight - 1), x2: clamp(scaledBox.x2, processedWidth - 1), y2: clamp(scaledBox.y2, processedHeight - 1), x3: clamp(scaledBox.x3, processedWidth - 1), y3: clamp(scaledBox.y3, processedHeight - 1), x4: clamp(scaledBox.x4, processedWidth - 1), y4: clamp(scaledBox.y4, processedHeight - 1) }; } scaleBoxToOriginalImage(box, processedImage) { const { scaleX, scaleY, paddingX, paddingY, originalWidth, originalHeight } = processedImage; const paddedX1 = box.x1 * scaleX; const paddedY1 = box.y1 * scaleY; const paddedX3 = box.x3 * scaleX; const paddedY3 = box.y3 * scaleY; const originalX1 = paddedX1 - paddingX; const originalY1 = paddedY1 - paddingY; const originalX3 = paddedX3 - paddingX; const originalY3 = paddedY3 - paddingY; const clamp = (value, max) => Math.max(0, Math.min(max, value)); return { x1: clamp(originalX1, originalWidth - 1), y1: clamp(originalY1, originalHeight - 1), x2: clamp(originalX3, originalWidth - 1), y2: clamp(originalY1, originalHeight - 1), x3: clamp(originalX3, originalWidth - 1), y3: clamp(originalY3, originalHeight - 1), x4: clamp(originalX1, originalWidth - 1), y4: clamp(originalY3, originalHeight - 1), confidence: box.confidence }; } } export default DetectionProcessor;