Electron-vue3-ts-offline/server/utils/detectionProcessor.js

// server/utils/detectionProcessor.js
import { Tensor } from 'onnxruntime-node';
import sharp from 'sharp';

class DetectionProcessor {
    constructor() {
        this.session = null;
        this.config = null;
    }

    initialize(session, config) {
        this.session = session;
        this.config = config;
    }

    async detectText(processedImage) {
        try {
            const inputTensor = await this.prepareDetectionInput(processedImage);
            const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
            const textBoxes = this.postprocessDetection(outputs, processedImage);
            return textBoxes;
        } catch (error) {
            console.error('文本检测失败:', error);
            return [];
        }
    }

    async prepareDetectionInput(processedImage) {
        const { buffer, width, height } = processedImage;

        const imageData = await sharp(buffer)
            .ensureAlpha()
            .raw()
            .toBuffer({ resolveWithObject: true });

        const inputData = new Float32Array(3 * height * width);
        const data = imageData.data;
        const channels = imageData.info.channels;

        for (let i = 0; i < data.length; i += channels) {
            const pixelIndex = Math.floor(i / channels);
            const channel = Math.floor(pixelIndex / (height * width));
            const posInChannel = pixelIndex % (height * width);

            if (channel < 3) {
                const y = Math.floor(posInChannel / width);
                const x = posInChannel % width;
                const inputIndex = channel * height * width + y * width + x;

                if (inputIndex < inputData.length) {
                    inputData[inputIndex] = data[i] / 255.0;
                }
            }
        }

        return new Tensor('float32', inputData, [1, 3, height, width]);
    }

    postprocessDetection(outputs, processedImage) {
        try {
            const boxes = [];
            const outputNames = this.session.outputNames;
            const detectionOutput = outputs[outputNames[0]];

            if (!detectionOutput) {
                return boxes;
            }

            const [batch, channels, height, width] = detectionOutput.dims;
            const data = detectionOutput.data;

            // 降低检测阈值，提高召回率
            const threshold = this.config.detThresh || 0.05;
            const points = [];

            // 改进的点收集逻辑
            for (let y = 0; y < height; y++) {
                for (let x = 0; x < width; x++) {
                    const idx = y * width + x;
                    const prob = data[idx];
                    if (prob > threshold) {
                        points.push({
                            x,
                            y,
                            prob,
                            localMax: this.isLocalMaximum(data, x, y, width, height, 2)
                        });
                    }
                }
            }

            if (points.length === 0) {
                return boxes;
            }

            // 改进的聚类算法
            const clusters = this.enhancedCluster(points, 8);

            for (const cluster of clusters) {
                // 降低最小点数要求
                if (cluster.length < 2) continue;

                const minX = Math.min(...cluster.map(p => p.x));
                const maxX = Math.max(...cluster.map(p => p.x));
                const minY = Math.min(...cluster.map(p => p.y));
                const maxY = Math.max(...cluster.map(p => p.y));

                const boxWidth = maxX - minX;
                const boxHeight = maxY - minY;

                // 放宽尺寸限制
                if (boxWidth < 2 || boxHeight < 2) continue;

                const aspectRatio = boxWidth / boxHeight;
                // 放宽宽高比限制
                if (aspectRatio > 100 || aspectRatio < 0.01) continue;

                const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;

                // 降低框置信度阈值
                const boxThreshold = this.config.detBoxThresh || 0.1;
                if (avgConfidence > boxThreshold) {
                    const box = this.scaleBoxToProcessedImage({
                        x1: minX, y1: minY,
                        x2: maxX, y2: minY,
                        x3: maxX, y3: maxY,
                        x4: minX, y4: maxY
                    }, processedImage);
                    box.confidence = avgConfidence;
                    boxes.push(box);
                }
            }

            boxes.sort((a, b) => b.confidence - a.confidence);
            console.log(`✅ 检测到 ${boxes.length} 个文本区域`);
            return boxes;

        } catch (error) {
            console.error('检测后处理错误:', error);
            return [];
        }
    }

    // 添加局部最大值检测
    isLocalMaximum(data, x, y, width, height, radius) {
        const centerProb = data[y * width + x];
        for (let dy = -radius; dy <= radius; dy++) {
            for (let dx = -radius; dx <= radius; dx++) {
                if (dx === 0 && dy === 0) continue;
                const nx = x + dx;
                const ny = y + dy;
                if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
                    if (data[ny * width + nx] > centerProb) {
                        return false;
                    }
                }
            }
        }
        return true;
    }

    // 改进的聚类算法
    enhancedCluster(points, distanceThreshold) {
        const clusters = [];
        const visited = new Set();

        // 按概率降序排序，优先处理高置信度点
        const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);

        for (let i = 0; i < sortedPoints.length; i++) {
            if (visited.has(i)) continue;

            const cluster = [];
            const queue = [i];
            visited.add(i);

            while (queue.length > 0) {
                const currentIndex = queue.shift();
                const currentPoint = sortedPoints[currentIndex];
                cluster.push(currentPoint);

                // 动态调整搜索半径
                const adaptiveThreshold = distanceThreshold *
                    (1 + (1 - currentPoint.prob) * 0.5);

                for (let j = 0; j < sortedPoints.length; j++) {
                    if (visited.has(j)) continue;

                    const targetPoint = sortedPoints[j];
                    const dist = Math.sqrt(
                        Math.pow(targetPoint.x - currentPoint.x, 2) +
                        Math.pow(targetPoint.y - currentPoint.y, 2)
                    );

                    if (dist < adaptiveThreshold) {
                        queue.push(j);
                        visited.add(j);
                    }
                }
            }

            if (cluster.length > 0) {
                clusters.push(cluster);
            }
        }

        return clusters;
    }

    scaleBoxToProcessedImage(box, processedImage) {
        const { width: processedWidth, height: processedHeight } = processedImage;

        const scaledBox = {
            x1: box.x1,
            y1: box.y1,
            x2: box.x2,
            y2: box.y2,
            x3: box.x3,
            y3: box.y3,
            x4: box.x4,
            y4: box.y4
        };

        const clamp = (value, max) => Math.max(0, Math.min(max, value));

        return {
            x1: clamp(scaledBox.x1, processedWidth - 1),
            y1: clamp(scaledBox.y1, processedHeight - 1),
            x2: clamp(scaledBox.x2, processedWidth - 1),
            y2: clamp(scaledBox.y2, processedHeight - 1),
            x3: clamp(scaledBox.x3, processedWidth - 1),
            y3: clamp(scaledBox.y3, processedHeight - 1),
            x4: clamp(scaledBox.x4, processedWidth - 1),
            y4: clamp(scaledBox.y4, processedHeight - 1)
        };
    }

    scaleBoxToOriginalImage(box, processedImage) {
        const {
            scaleX, scaleY,
            paddingX, paddingY,
            originalWidth, originalHeight
        } = processedImage;

        const paddedX1 = box.x1 * scaleX;
        const paddedY1 = box.y1 * scaleY;
        const paddedX3 = box.x3 * scaleX;
        const paddedY3 = box.y3 * scaleY;

        const originalX1 = paddedX1 - paddingX;
        const originalY1 = paddedY1 - paddingY;
        const originalX3 = paddedX3 - paddingX;
        const originalY3 = paddedY3 - paddingY;

        const clamp = (value, max) => Math.max(0, Math.min(max, value));

        return {
            x1: clamp(originalX1, originalWidth - 1),
            y1: clamp(originalY1, originalHeight - 1),
            x2: clamp(originalX3, originalWidth - 1),
            y2: clamp(originalY1, originalHeight - 1),
            x3: clamp(originalX3, originalWidth - 1),
            y3: clamp(originalY3, originalHeight - 1),
            x4: clamp(originalX1, originalWidth - 1),
            y4: clamp(originalY3, originalHeight - 1),
            confidence: box.confidence
        };
    }
}

export default DetectionProcessor;