Electron-vue3-ts-offline/server/utils/detectionProcessor.js

// server/utils/detectionProcessor.js
import { Tensor } from 'onnxruntime-node';
import sharp from 'sharp';

class DetectionProcessor {
    constructor() {
        this.session = null;
        this.config = null;
    }

    initialize(session, config) {
        this.session = session;
        this.config = config;
    }

    async detectText(processedImage) {
        try {
            const inputTensor = await this.prepareDetectionInput(processedImage);
            const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
            const textBoxes = this.postprocessDetection(outputs, processedImage);
            return textBoxes;
        } catch (error) {
            console.error('文本检测失败:', error);
            return [];
        }
    }

    async prepareDetectionInput(processedImage) {
        const { buffer, width, height } = processedImage;

        const imageData = await sharp(buffer)
            .ensureAlpha()
            .raw()
            .toBuffer({ resolveWithObject: true });

        const inputData = new Float32Array(3 * height * width);
        const data = imageData.data;
        const channels = imageData.info.channels;

        for (let i = 0; i < data.length; i += channels) {
            const pixelIndex = Math.floor(i / channels);
            const channel = Math.floor(pixelIndex / (height * width));
            const posInChannel = pixelIndex % (height * width);

            if (channel < 3) {
                const y = Math.floor(posInChannel / width);
                const x = posInChannel % width;
                const inputIndex = channel * height * width + y * width + x;

                if (inputIndex < inputData.length) {
                    inputData[inputIndex] = data[i] / 255.0;
                }
            }
        }

        return new Tensor('float32', inputData, [1, 3, height, width]);
    }

    postprocessDetection(outputs, processedImage) {
        try {
            const boxes = [];
            const outputNames = this.session.outputNames;
            const detectionOutput = outputs[outputNames[0]];

            if (!detectionOutput) {
                return boxes;
            }

            const [batch, channels, height, width] = detectionOutput.dims;
            const data = detectionOutput.data;

            // 降低检测阈值，提高召回率
            const threshold = this.config.detThresh || 0.05;
            const points = [];

            // 改进的点收集逻辑
            for (let y = 0; y < height; y++) {
                for (let x = 0; x < width; x++) {
                    const idx = y * width + x;
                    const prob = data[idx];
                    if (prob > threshold) {
                        points.push({
                            x,
                            y,
                            prob,
                            localMax: this.isLocalMaximum(data, x, y, width, height, 2)
                        });
                    }
                }
            }

            if (points.length === 0) {
                return boxes;
            }

            // 改进的聚类算法
            const clusters = this.enhancedCluster(points, 8);

            for (const cluster of clusters) {
                // 降低最小点数要求
                if (cluster.length < 2) continue;

                const minX = Math.min(...cluster.map(p => p.x));
                const maxX = Math.max(...cluster.map(p => p.x));
                const minY = Math.min(...cluster.map(p => p.y));
                const maxY = Math.max(...cluster.map(p => p.y));

                const boxWidth = maxX - minX;
                const boxHeight = maxY - minY;

                // 放宽尺寸限制
                if (boxWidth < 2 || boxHeight < 2) continue;

                const aspectRatio = boxWidth / boxHeight;
                // 放宽宽高比限制
                if (aspectRatio > 100 || aspectRatio < 0.01) continue;

                const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;

                // 降低框置信度阈值
                const boxThreshold = this.config.detBoxThresh || 0.1;
                if (avgConfidence > boxThreshold) {
                    const box = this.scaleBoxToProcessedImage({
                        x1: minX, y1: minY,
                        x2: maxX, y2: minY,
                        x3: maxX, y3: maxY,
                        x4: minX, y4: maxY
                    }, processedImage);
                    box.confidence = avgConfidence;
                    boxes.push(box);
                }
            }

            boxes.sort((a, b) => b.confidence - a.confidence);
            console.log(`✅ 检测到 ${boxes.length} 个文本区域`);
            return boxes;

        } catch (error) {
            console.error('检测后处理错误:', error);
            return [];
        }
    }

    // 添加局部最大值检测
    isLocalMaximum(data, x, y, width, height, radius) {
        const centerProb = data[y * width + x];
        for (let dy = -radius; dy <= radius; dy++) {
            for (let dx = -radius; dx <= radius; dx++) {
                if (dx === 0 && dy === 0) continue;
                const nx = x + dx;
                const ny = y + dy;
                if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
                    if (data[ny * width + nx] > centerProb) {
                        return false;
                    }
                }
            }
        }
        return true;
    }

    // 改进的聚类算法
    enhancedCluster(points, distanceThreshold) {
        const clusters = [];
        const visited = new Set();

        // 按概率降序排序，优先处理高置信度点
        const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);

        for (let i = 0; i < sortedPoints.length; i++) {
            if (visited.has(i)) continue;

            const cluster = [];
            const queue = [i];
            visited.add(i);

            while (queue.length > 0) {
                const currentIndex = queue.shift();
                const currentPoint = sortedPoints[currentIndex];
                cluster.push(currentPoint);

                // 动态调整搜索半径
                const adaptiveThreshold = distanceThreshold *
                    (1 + (1 - currentPoint.prob) * 0.5);

                for (let j = 0; j < sortedPoints.length; j++) {
                    if (visited.has(j)) continue;

                    const targetPoint = sortedPoints[j];
                    const dist = Math.sqrt(
                        Math.pow(targetPoint.x - currentPoint.x, 2) +
                        Math.pow(targetPoint.y - currentPoint.y, 2)
                    );

                    if (dist < adaptiveThreshold) {
                        queue.push(j);
                        visited.add(j);
                    }
                }
            }

            if (cluster.length > 0) {
                clusters.push(cluster);
            }
        }

        return clusters;
    }

    scaleBoxToProcessedImage(box, processedImage) {
        const { width: processedWidth, height: processedHeight } = processedImage;

        const scaledBox = {
            x1: box.x1,
            y1: box.y1,
            x2: box.x2,
            y2: box.y2,
            x3: box.x3,
            y3: box.y3,
            x4: box.x4,
            y4: box.y4
        };

        const clamp = (value, max) => Math.max(0, Math.min(max, value));

        return {
            x1: clamp(scaledBox.x1, processedWidth - 1),
            y1: clamp(scaledBox.y1, processedHeight - 1),
            x2: clamp(scaledBox.x2, processedWidth - 1),
            y2: clamp(scaledBox.y2, processedHeight - 1),
            x3: clamp(scaledBox.x3, processedWidth - 1),
            y3: clamp(scaledBox.y3, processedHeight - 1),
            x4: clamp(scaledBox.x4, processedWidth - 1),
            y4: clamp(scaledBox.y4, processedHeight - 1)
        };
    }

    scaleBoxToOriginalImage(box, processedImage) {
        const {
            scaleX, scaleY,
            paddingX, paddingY,
            originalWidth, originalHeight
        } = processedImage;

        const paddedX1 = box.x1 * scaleX;
        const paddedY1 = box.y1 * scaleY;
        const paddedX3 = box.x3 * scaleX;
        const paddedY3 = box.y3 * scaleY;

        const originalX1 = paddedX1 - paddingX;
        const originalY1 = paddedY1 - paddingY;
        const originalX3 = paddedX3 - paddingX;
        const originalY3 = paddedY3 - paddingY;

        const clamp = (value, max) => Math.max(0, Math.min(max, value));

        return {
            x1: clamp(originalX1, originalWidth - 1),
            y1: clamp(originalY1, originalHeight - 1),
            x2: clamp(originalX3, originalWidth - 1),
            y2: clamp(originalY1, originalHeight - 1),
            x3: clamp(originalX3, originalWidth - 1),
            y3: clamp(originalY3, originalHeight - 1),
            x4: clamp(originalX1, originalWidth - 1),
            y4: clamp(originalY3, originalHeight - 1),
            confidence: box.confidence
        };
    }
}

export default DetectionProcessor;
init 2025-11-13 16:34:41 +08:00			`// server/utils/detectionProcessor.js`
			`import { Tensor } from 'onnxruntime-node';`
			`import sharp from 'sharp';`

			`class DetectionProcessor {`
			`constructor() {`
			`this.session = null;`
			`this.config = null;`
			`}`

			`initialize(session, config) {`
			`this.session = session;`
			`this.config = config;`
			`}`

			`async detectText(processedImage) {`
			`try {`
			`const inputTensor = await this.prepareDetectionInput(processedImage);`
			`const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });`
			`const textBoxes = this.postprocessDetection(outputs, processedImage);`
			`return textBoxes;`
			`} catch (error) {`
			`console.error('文本检测失败:', error);`
			`return [];`
			`}`
			`}`

			`async prepareDetectionInput(processedImage) {`
			`const { buffer, width, height } = processedImage;`

			`const imageData = await sharp(buffer)`
			`.ensureAlpha()`
			`.raw()`
			`.toBuffer({ resolveWithObject: true });`

			`const inputData = new Float32Array(3 * height * width);`
			`const data = imageData.data;`
			`const channels = imageData.info.channels;`

			`for (let i = 0; i < data.length; i += channels) {`
			`const pixelIndex = Math.floor(i / channels);`
			`const channel = Math.floor(pixelIndex / (height * width));`
			`const posInChannel = pixelIndex % (height * width);`

			`if (channel < 3) {`
			`const y = Math.floor(posInChannel / width);`
			`const x = posInChannel % width;`
			`const inputIndex = channel * height * width + y * width + x;`

			`if (inputIndex < inputData.length) {`
			`inputData[inputIndex] = data[i] / 255.0;`
			`}`
			`}`
			`}`

			`return new Tensor('float32', inputData, [1, 3, height, width]);`
			`}`

			`postprocessDetection(outputs, processedImage) {`
			`try {`
			`const boxes = [];`
			`const outputNames = this.session.outputNames;`
			`const detectionOutput = outputs[outputNames[0]];`

			`if (!detectionOutput) {`
			`return boxes;`
			`}`

			`const [batch, channels, height, width] = detectionOutput.dims;`
			`const data = detectionOutput.data;`

			`// 降低检测阈值，提高召回率`
			`const threshold = this.config.detThresh \|\| 0.05;`
			`const points = [];`

			`// 改进的点收集逻辑`
			`for (let y = 0; y < height; y++) {`
			`for (let x = 0; x < width; x++) {`
			`const idx = y * width + x;`
			`const prob = data[idx];`
			`if (prob > threshold) {`
			`points.push({`
			`x,`
			`y,`
			`prob,`
			`localMax: this.isLocalMaximum(data, x, y, width, height, 2)`
			`});`
			`}`
			`}`
			`}`

			`if (points.length === 0) {`
			`return boxes;`
			`}`

			`// 改进的聚类算法`
			`const clusters = this.enhancedCluster(points, 8);`

			`for (const cluster of clusters) {`
			`// 降低最小点数要求`
			`if (cluster.length < 2) continue;`

			`const minX = Math.min(...cluster.map(p => p.x));`
			`const maxX = Math.max(...cluster.map(p => p.x));`
			`const minY = Math.min(...cluster.map(p => p.y));`
			`const maxY = Math.max(...cluster.map(p => p.y));`

			`const boxWidth = maxX - minX;`
			`const boxHeight = maxY - minY;`

			`// 放宽尺寸限制`
			`if (boxWidth < 2 \|\| boxHeight < 2) continue;`

			`const aspectRatio = boxWidth / boxHeight;`
			`// 放宽宽高比限制`
			`if (aspectRatio > 100 \|\| aspectRatio < 0.01) continue;`

			`const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;`

			`// 降低框置信度阈值`
			`const boxThreshold = this.config.detBoxThresh \|\| 0.1;`
			`if (avgConfidence > boxThreshold) {`
			`const box = this.scaleBoxToProcessedImage({`
			`x1: minX, y1: minY,`
			`x2: maxX, y2: minY,`
			`x3: maxX, y3: maxY,`
			`x4: minX, y4: maxY`
			`}, processedImage);`
			`box.confidence = avgConfidence;`
			`boxes.push(box);`
			`}`
			`}`

			`boxes.sort((a, b) => b.confidence - a.confidence);`
			console.log(`✅ 检测到 ${boxes.length} 个文本区域`);
			`return boxes;`

			`} catch (error) {`
			`console.error('检测后处理错误:', error);`
			`return [];`
			`}`
			`}`

			`// 添加局部最大值检测`
			`isLocalMaximum(data, x, y, width, height, radius) {`
			`const centerProb = data[y * width + x];`
			`for (let dy = -radius; dy <= radius; dy++) {`
			`for (let dx = -radius; dx <= radius; dx++) {`
			`if (dx === 0 && dy === 0) continue;`
			`const nx = x + dx;`
			`const ny = y + dy;`
			`if (nx >= 0 && nx < width && ny >= 0 && ny < height) {`
			`if (data[ny * width + nx] > centerProb) {`
			`return false;`
			`}`
			`}`
			`}`
			`}`
			`return true;`
			`}`

			`// 改进的聚类算法`
			`enhancedCluster(points, distanceThreshold) {`
			`const clusters = [];`
			`const visited = new Set();`

			`// 按概率降序排序，优先处理高置信度点`
			`const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);`

			`for (let i = 0; i < sortedPoints.length; i++) {`
			`if (visited.has(i)) continue;`

			`const cluster = [];`
			`const queue = [i];`
			`visited.add(i);`

			`while (queue.length > 0) {`
			`const currentIndex = queue.shift();`
			`const currentPoint = sortedPoints[currentIndex];`
			`cluster.push(currentPoint);`

			`// 动态调整搜索半径`
			`const adaptiveThreshold = distanceThreshold *`
			`(1 + (1 - currentPoint.prob) * 0.5);`

			`for (let j = 0; j < sortedPoints.length; j++) {`
			`if (visited.has(j)) continue;`

			`const targetPoint = sortedPoints[j];`
			`const dist = Math.sqrt(`
			`Math.pow(targetPoint.x - currentPoint.x, 2) +`
			`Math.pow(targetPoint.y - currentPoint.y, 2)`
			`);`

			`if (dist < adaptiveThreshold) {`
			`queue.push(j);`
			`visited.add(j);`
			`}`
			`}`
			`}`

			`if (cluster.length > 0) {`
			`clusters.push(cluster);`
			`}`
			`}`

			`return clusters;`
			`}`

			`scaleBoxToProcessedImage(box, processedImage) {`
			`const { width: processedWidth, height: processedHeight } = processedImage;`

			`const scaledBox = {`
			`x1: box.x1,`
			`y1: box.y1,`
			`x2: box.x2,`
			`y2: box.y2,`
			`x3: box.x3,`
			`y3: box.y3,`
			`x4: box.x4,`
			`y4: box.y4`
			`};`

			`const clamp = (value, max) => Math.max(0, Math.min(max, value));`

			`return {`
			`x1: clamp(scaledBox.x1, processedWidth - 1),`
			`y1: clamp(scaledBox.y1, processedHeight - 1),`
			`x2: clamp(scaledBox.x2, processedWidth - 1),`
			`y2: clamp(scaledBox.y2, processedHeight - 1),`
			`x3: clamp(scaledBox.x3, processedWidth - 1),`
			`y3: clamp(scaledBox.y3, processedHeight - 1),`
			`x4: clamp(scaledBox.x4, processedWidth - 1),`
			`y4: clamp(scaledBox.y4, processedHeight - 1)`
			`};`
			`}`

			`scaleBoxToOriginalImage(box, processedImage) {`
			`const {`
			`scaleX, scaleY,`
			`paddingX, paddingY,`
			`originalWidth, originalHeight`
			`} = processedImage;`

			`const paddedX1 = box.x1 * scaleX;`
			`const paddedY1 = box.y1 * scaleY;`
			`const paddedX3 = box.x3 * scaleX;`
			`const paddedY3 = box.y3 * scaleY;`

			`const originalX1 = paddedX1 - paddingX;`
			`const originalY1 = paddedY1 - paddingY;`
			`const originalX3 = paddedX3 - paddingX;`
			`const originalY3 = paddedY3 - paddingY;`

			`const clamp = (value, max) => Math.max(0, Math.min(max, value));`

			`return {`
			`x1: clamp(originalX1, originalWidth - 1),`
			`y1: clamp(originalY1, originalHeight - 1),`
			`x2: clamp(originalX3, originalWidth - 1),`
			`y2: clamp(originalY1, originalHeight - 1),`
			`x3: clamp(originalX3, originalWidth - 1),`
			`y3: clamp(originalY3, originalHeight - 1),`
			`x4: clamp(originalX1, originalWidth - 1),`
			`y4: clamp(originalY3, originalHeight - 1),`
			`confidence: box.confidence`
			`};`
			`}`
			`}`

			`export default DetectionProcessor;`