xuqinmin12
/
Electron-vue3-ts-offline


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
							// server/utils/detectionProcessor.js
import { Tensor } from 'onnxruntime-node';
import sharp from 'sharp';

class DetectionProcessor {
    constructor() {
        this.session = null;
        this.config = null;
        this.logger = {
            info: (msg, ...args) => console.log(`🔍 [检测] ${msg}`, ...args),
            error: (msg, ...args) => console.error(`❌ [检测] ${msg}`, ...args),
            debug: (msg, ...args) => console.log(`🐛 [检测] ${msg}`, ...args)
        };
    }

    initialize(session, config) {
        this.session = session;
        this.config = config;
        this.logger.info('检测处理器初始化完成');
    }

    async detectText(processedImage) {
        const startTime = Date.now();
        this.logger.info('开始文本检测');

        try {
            const inputTensor = await this.prepareDetectionInput(processedImage);
            const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
            const textBoxes = this.postprocessDetection(outputs, processedImage);

            const processingTime = Date.now() - startTime;
            this.logger.info(`检测完成: ${textBoxes.length}个区域, 耗时${processingTime}ms`);

            return textBoxes;
        } catch (error) {
            this.logger.error('检测失败', error);
            return [];
        }
    }

    async prepareDetectionInput(processedImage) {
        const { buffer, width, height } = processedImage;
        this.logger.debug(`准备检测输入: ${width}x${height}`);

        const imageData = await sharp(buffer)
            .ensureAlpha()
            .raw()
            .toBuffer({ resolveWithObject: true });

        const inputData = new Float32Array(3 * height * width);
        const data = imageData.data;
        const channels = imageData.info.channels;

        // 优化数据填充逻辑
        for (let i = 0; i < data.length; i += channels) {
            const pixelIndex = Math.floor(i / channels);
            const y = Math.floor(pixelIndex / width);
            const x = pixelIndex % width;

            for (let c = 0; c < 3; c++) {
                const inputIndex = c * height * width + y * width + x;
                if (inputIndex < inputData.length) {
                    inputData[inputIndex] = data[i] / 255.0;
                }
            }
        }

        this.logger.debug('检测输入张量准备完成');
        return new Tensor('float32', inputData, [1, 3, height, width]);
    }

    postprocessDetection(outputs, processedImage) {
        this.logger.debug('开始检测后处理');

        try {
            const boxes = [];
            const outputNames = this.session.outputNames;
            const detectionOutput = outputs[outputNames[0]];

            if (!detectionOutput) {
                this.logger.debug('检测输出为空');
                return boxes;
            }

            const [batch, channels, height, width] = detectionOutput.dims;
            const data = detectionOutput.data;

            // 动态阈值调整
            const baseThreshold = this.config.detThresh || 0.05;
            const adaptiveThreshold = this.calculateAdaptiveThreshold(data, baseThreshold);

            this.logger.debug(`使用检测阈值: ${adaptiveThreshold.toFixed(4)}`);

            const points = this.collectDetectionPoints(data, width, height, adaptiveThreshold);

            if (points.length === 0) {
                this.logger.debug('未检测到有效文本点');
                return boxes;
            }

            this.logger.debug(`收集到 ${points.length} 个检测点`);
            const clusters = this.enhancedCluster(points, this.config.clusterDistance || 8);
            this.logger.debug(`聚类得到 ${clusters.length} 个区域`);

            const validBoxes = this.filterAndScaleBoxes(clusters, processedImage);
            this.logger.info(`生成 ${validBoxes.length} 个有效文本框`);

            return validBoxes.sort((a, b) => b.confidence - a.confidence);

        } catch (error) {
            this.logger.error('检测后处理错误', error);
            return [];
        }
    }

    collectDetectionPoints(data, width, height, threshold) {
        const points = [];
        let totalProb = 0;
        let maxProb = 0;

        for (let y = 0; y < height; y++) {
            for (let x = 0; x < width; x++) {
                const idx = y * width + x;
                const prob = data[idx];

                if (prob > threshold) {
                    totalProb += prob;
                    maxProb = Math.max(maxProb, prob);
                    points.push({
                        x, y, prob,
                        localMax: this.isLocalMaximum(data, x, y, width, height, 2)
                    });
                }
            }
        }

        if (points.length > 0) {
            this.logger.debug(`检测点统计: 平均置信度 ${(totalProb/points.length).toFixed(4)}, 最大置信度 ${maxProb.toFixed(4)}`);
        }

        return points;
    }

    calculateAdaptiveThreshold(data, baseThreshold) {
        // 基于图像特性动态调整阈值
        let sum = 0;
        let count = 0;
        const sampleSize = Math.min(1000, data.length);

        for (let i = 0; i < sampleSize; i++) {
            const idx = Math.floor(Math.random() * data.length);
            if (data[idx] > baseThreshold) {
                sum += data[idx];
                count++;
            }
        }

        if (count === 0) return baseThreshold;

        const mean = sum / count;
        return Math.min(baseThreshold * 1.5, mean * 0.8);
    }

    filterAndScaleBoxes(clusters, processedImage) {
        const boxes = [];
        const minPoints = this.config.minClusterPoints || 2;
        const boxThreshold = this.config.detBoxThresh || 0.1;

        for (const cluster of clusters) {
            if (cluster.length < minPoints) continue;

            const minX = Math.min(...cluster.map(p => p.x));
            const maxX = Math.max(...cluster.map(p => p.x));
            const minY = Math.min(...cluster.map(p => p.y));
            const maxY = Math.max(...cluster.map(p => p.y));

            const boxWidth = maxX - minX;
            const boxHeight = maxY - minY;

            // 放宽尺寸限制，提高小文本检测
            if (boxWidth < 1 || boxHeight < 1) continue;

            const aspectRatio = boxWidth / boxHeight;
            if (aspectRatio > 150 || aspectRatio < 0.005) continue;

            const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;

            if (avgConfidence > boxThreshold) {
                const box = this.scaleBoxToProcessedImage({
                    x1: minX, y1: minY,
                    x2: maxX, y2: minY,
                    x3: maxX, y3: maxY,
                    x4: minX, y4: maxY
                }, processedImage);
                box.confidence = avgConfidence;
                boxes.push(box);
            }
        }

        return boxes;
    }

    isLocalMaximum(data, x, y, width, height, radius) {
        const centerProb = data[y * width + x];
        for (let dy = -radius; dy <= radius; dy++) {
            for (let dx = -radius; dx <= radius; dx++) {
                if (dx === 0 && dy === 0) continue;
                const nx = x + dx;
                const ny = y + dy;
                if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
                    if (data[ny * width + nx] > centerProb) {
                        return false;
                    }
                }
            }
        }
        return true;
    }

    enhancedCluster(points, distanceThreshold) {
        const clusters = [];
        const visited = new Set();
        const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);

        for (let i = 0; i < sortedPoints.length; i++) {
            if (visited.has(i)) continue;

            const cluster = [];
            const queue = [i];
            visited.add(i);

            while (queue.length > 0) {
                const currentIndex = queue.shift();
                const currentPoint = sortedPoints[currentIndex];
                cluster.push(currentPoint);

                // 动态调整搜索半径
                const adaptiveThreshold = distanceThreshold * (1 + (1 - currentPoint.prob) * 0.3);

                for (let j = 0; j < sortedPoints.length; j++) {
                    if (visited.has(j)) continue;

                    const targetPoint = sortedPoints[j];
                    const dist = Math.sqrt(
                        Math.pow(targetPoint.x - currentPoint.x, 2) +
                        Math.pow(targetPoint.y - currentPoint.y, 2)
                    );

                    if (dist < adaptiveThreshold) {
                        queue.push(j);
                        visited.add(j);
                    }
                }
            }

            if (cluster.length > 0) {
                clusters.push(cluster);
            }
        }

        return clusters;
    }

    scaleBoxToProcessedImage(box, processedImage) {
        const { width: processedWidth, height: processedHeight } = processedImage;
        const clamp = (value, max) => Math.max(0, Math.min(max, value));

        return {
            x1: clamp(box.x1, processedWidth - 1),
            y1: clamp(box.y1, processedHeight - 1),
            x2: clamp(box.x2, processedWidth - 1),
            y2: clamp(box.y2, processedHeight - 1),
            x3: clamp(box.x3, processedWidth - 1),
            y3: clamp(box.y3, processedHeight - 1),
            x4: clamp(box.x4, processedWidth - 1),
            y4: clamp(box.y4, processedHeight - 1)
        };
    }
}

export default DetectionProcessor;