Electron-vue3-ts-offline/server/utils/detectionProcessor.js
2025-11-13 18:09:31 +08:00

281 行
9.6 KiB
JavaScript

// server/utils/detectionProcessor.js
import { Tensor } from 'onnxruntime-node';
import sharp from 'sharp';
class DetectionProcessor {
constructor() {
this.session = null;
this.config = null;
this.logger = {
info: (msg, ...args) => console.log(`🔍 [检测] ${msg}`, ...args),
error: (msg, ...args) => console.error(`❌ [检测] ${msg}`, ...args),
debug: (msg, ...args) => console.log(`🐛 [检测] ${msg}`, ...args)
};
}
initialize(session, config) {
this.session = session;
this.config = config;
this.logger.info('检测处理器初始化完成');
}
async detectText(processedImage) {
const startTime = Date.now();
this.logger.info('开始文本检测');
try {
const inputTensor = await this.prepareDetectionInput(processedImage);
const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
const textBoxes = this.postprocessDetection(outputs, processedImage);
const processingTime = Date.now() - startTime;
this.logger.info(`检测完成: ${textBoxes.length}个区域, 耗时${processingTime}ms`);
return textBoxes;
} catch (error) {
this.logger.error('检测失败', error);
return [];
}
}
async prepareDetectionInput(processedImage) {
const { buffer, width, height } = processedImage;
this.logger.debug(`准备检测输入: ${width}x${height}`);
const imageData = await sharp(buffer)
.ensureAlpha()
.raw()
.toBuffer({ resolveWithObject: true });
const inputData = new Float32Array(3 * height * width);
const data = imageData.data;
const channels = imageData.info.channels;
// 优化数据填充逻辑
for (let i = 0; i < data.length; i += channels) {
const pixelIndex = Math.floor(i / channels);
const y = Math.floor(pixelIndex / width);
const x = pixelIndex % width;
for (let c = 0; c < 3; c++) {
const inputIndex = c * height * width + y * width + x;
if (inputIndex < inputData.length) {
inputData[inputIndex] = data[i] / 255.0;
}
}
}
this.logger.debug('检测输入张量准备完成');
return new Tensor('float32', inputData, [1, 3, height, width]);
}
postprocessDetection(outputs, processedImage) {
this.logger.debug('开始检测后处理');
try {
const boxes = [];
const outputNames = this.session.outputNames;
const detectionOutput = outputs[outputNames[0]];
if (!detectionOutput) {
this.logger.debug('检测输出为空');
return boxes;
}
const [batch, channels, height, width] = detectionOutput.dims;
const data = detectionOutput.data;
// 动态阈值调整
const baseThreshold = this.config.detThresh || 0.05;
const adaptiveThreshold = this.calculateAdaptiveThreshold(data, baseThreshold);
this.logger.debug(`使用检测阈值: ${adaptiveThreshold.toFixed(4)}`);
const points = this.collectDetectionPoints(data, width, height, adaptiveThreshold);
if (points.length === 0) {
this.logger.debug('未检测到有效文本点');
return boxes;
}
this.logger.debug(`收集到 ${points.length} 个检测点`);
const clusters = this.enhancedCluster(points, this.config.clusterDistance || 8);
this.logger.debug(`聚类得到 ${clusters.length} 个区域`);
const validBoxes = this.filterAndScaleBoxes(clusters, processedImage);
this.logger.info(`生成 ${validBoxes.length} 个有效文本框`);
return validBoxes.sort((a, b) => b.confidence - a.confidence);
} catch (error) {
this.logger.error('检测后处理错误', error);
return [];
}
}
collectDetectionPoints(data, width, height, threshold) {
const points = [];
let totalProb = 0;
let maxProb = 0;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
const prob = data[idx];
if (prob > threshold) {
totalProb += prob;
maxProb = Math.max(maxProb, prob);
points.push({
x, y, prob,
localMax: this.isLocalMaximum(data, x, y, width, height, 2)
});
}
}
}
if (points.length > 0) {
this.logger.debug(`检测点统计: 平均置信度 ${(totalProb/points.length).toFixed(4)}, 最大置信度 ${maxProb.toFixed(4)}`);
}
return points;
}
calculateAdaptiveThreshold(data, baseThreshold) {
// 基于图像特性动态调整阈值
let sum = 0;
let count = 0;
const sampleSize = Math.min(1000, data.length);
for (let i = 0; i < sampleSize; i++) {
const idx = Math.floor(Math.random() * data.length);
if (data[idx] > baseThreshold) {
sum += data[idx];
count++;
}
}
if (count === 0) return baseThreshold;
const mean = sum / count;
return Math.min(baseThreshold * 1.5, mean * 0.8);
}
filterAndScaleBoxes(clusters, processedImage) {
const boxes = [];
const minPoints = this.config.minClusterPoints || 2;
const boxThreshold = this.config.detBoxThresh || 0.1;
for (const cluster of clusters) {
if (cluster.length < minPoints) continue;
const minX = Math.min(...cluster.map(p => p.x));
const maxX = Math.max(...cluster.map(p => p.x));
const minY = Math.min(...cluster.map(p => p.y));
const maxY = Math.max(...cluster.map(p => p.y));
const boxWidth = maxX - minX;
const boxHeight = maxY - minY;
// 放宽尺寸限制,提高小文本检测
if (boxWidth < 1 || boxHeight < 1) continue;
const aspectRatio = boxWidth / boxHeight;
if (aspectRatio > 150 || aspectRatio < 0.005) continue;
const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;
if (avgConfidence > boxThreshold) {
const box = this.scaleBoxToProcessedImage({
x1: minX, y1: minY,
x2: maxX, y2: minY,
x3: maxX, y3: maxY,
x4: minX, y4: maxY
}, processedImage);
box.confidence = avgConfidence;
boxes.push(box);
}
}
return boxes;
}
isLocalMaximum(data, x, y, width, height, radius) {
const centerProb = data[y * width + x];
for (let dy = -radius; dy <= radius; dy++) {
for (let dx = -radius; dx <= radius; dx++) {
if (dx === 0 && dy === 0) continue;
const nx = x + dx;
const ny = y + dy;
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
if (data[ny * width + nx] > centerProb) {
return false;
}
}
}
}
return true;
}
enhancedCluster(points, distanceThreshold) {
const clusters = [];
const visited = new Set();
const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);
for (let i = 0; i < sortedPoints.length; i++) {
if (visited.has(i)) continue;
const cluster = [];
const queue = [i];
visited.add(i);
while (queue.length > 0) {
const currentIndex = queue.shift();
const currentPoint = sortedPoints[currentIndex];
cluster.push(currentPoint);
// 动态调整搜索半径
const adaptiveThreshold = distanceThreshold * (1 + (1 - currentPoint.prob) * 0.3);
for (let j = 0; j < sortedPoints.length; j++) {
if (visited.has(j)) continue;
const targetPoint = sortedPoints[j];
const dist = Math.sqrt(
Math.pow(targetPoint.x - currentPoint.x, 2) +
Math.pow(targetPoint.y - currentPoint.y, 2)
);
if (dist < adaptiveThreshold) {
queue.push(j);
visited.add(j);
}
}
}
if (cluster.length > 0) {
clusters.push(cluster);
}
}
return clusters;
}
scaleBoxToProcessedImage(box, processedImage) {
const { width: processedWidth, height: processedHeight } = processedImage;
const clamp = (value, max) => Math.max(0, Math.min(max, value));
return {
x1: clamp(box.x1, processedWidth - 1),
y1: clamp(box.y1, processedHeight - 1),
x2: clamp(box.x2, processedWidth - 1),
y2: clamp(box.y2, processedHeight - 1),
x3: clamp(box.x3, processedWidth - 1),
y3: clamp(box.y3, processedHeight - 1),
x4: clamp(box.x4, processedWidth - 1),
y4: clamp(box.y4, processedHeight - 1)
};
}
}
export default DetectionProcessor;