271 行
9.0 KiB
JavaScript
271 行
9.0 KiB
JavaScript
|
|
// server/utils/detectionProcessor.js
|
||
|
|
import { Tensor } from 'onnxruntime-node';
|
||
|
|
import sharp from 'sharp';
|
||
|
|
|
||
|
|
class DetectionProcessor {
|
||
|
|
constructor() {
|
||
|
|
this.session = null;
|
||
|
|
this.config = null;
|
||
|
|
}
|
||
|
|
|
||
|
|
initialize(session, config) {
|
||
|
|
this.session = session;
|
||
|
|
this.config = config;
|
||
|
|
}
|
||
|
|
|
||
|
|
async detectText(processedImage) {
|
||
|
|
try {
|
||
|
|
const inputTensor = await this.prepareDetectionInput(processedImage);
|
||
|
|
const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
|
||
|
|
const textBoxes = this.postprocessDetection(outputs, processedImage);
|
||
|
|
return textBoxes;
|
||
|
|
} catch (error) {
|
||
|
|
console.error('文本检测失败:', error);
|
||
|
|
return [];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
async prepareDetectionInput(processedImage) {
|
||
|
|
const { buffer, width, height } = processedImage;
|
||
|
|
|
||
|
|
const imageData = await sharp(buffer)
|
||
|
|
.ensureAlpha()
|
||
|
|
.raw()
|
||
|
|
.toBuffer({ resolveWithObject: true });
|
||
|
|
|
||
|
|
const inputData = new Float32Array(3 * height * width);
|
||
|
|
const data = imageData.data;
|
||
|
|
const channels = imageData.info.channels;
|
||
|
|
|
||
|
|
for (let i = 0; i < data.length; i += channels) {
|
||
|
|
const pixelIndex = Math.floor(i / channels);
|
||
|
|
const channel = Math.floor(pixelIndex / (height * width));
|
||
|
|
const posInChannel = pixelIndex % (height * width);
|
||
|
|
|
||
|
|
if (channel < 3) {
|
||
|
|
const y = Math.floor(posInChannel / width);
|
||
|
|
const x = posInChannel % width;
|
||
|
|
const inputIndex = channel * height * width + y * width + x;
|
||
|
|
|
||
|
|
if (inputIndex < inputData.length) {
|
||
|
|
inputData[inputIndex] = data[i] / 255.0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return new Tensor('float32', inputData, [1, 3, height, width]);
|
||
|
|
}
|
||
|
|
|
||
|
|
postprocessDetection(outputs, processedImage) {
|
||
|
|
try {
|
||
|
|
const boxes = [];
|
||
|
|
const outputNames = this.session.outputNames;
|
||
|
|
const detectionOutput = outputs[outputNames[0]];
|
||
|
|
|
||
|
|
if (!detectionOutput) {
|
||
|
|
return boxes;
|
||
|
|
}
|
||
|
|
|
||
|
|
const [batch, channels, height, width] = detectionOutput.dims;
|
||
|
|
const data = detectionOutput.data;
|
||
|
|
|
||
|
|
// 降低检测阈值,提高召回率
|
||
|
|
const threshold = this.config.detThresh || 0.05;
|
||
|
|
const points = [];
|
||
|
|
|
||
|
|
// 改进的点收集逻辑
|
||
|
|
for (let y = 0; y < height; y++) {
|
||
|
|
for (let x = 0; x < width; x++) {
|
||
|
|
const idx = y * width + x;
|
||
|
|
const prob = data[idx];
|
||
|
|
if (prob > threshold) {
|
||
|
|
points.push({
|
||
|
|
x,
|
||
|
|
y,
|
||
|
|
prob,
|
||
|
|
localMax: this.isLocalMaximum(data, x, y, width, height, 2)
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (points.length === 0) {
|
||
|
|
return boxes;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 改进的聚类算法
|
||
|
|
const clusters = this.enhancedCluster(points, 8);
|
||
|
|
|
||
|
|
for (const cluster of clusters) {
|
||
|
|
// 降低最小点数要求
|
||
|
|
if (cluster.length < 2) continue;
|
||
|
|
|
||
|
|
const minX = Math.min(...cluster.map(p => p.x));
|
||
|
|
const maxX = Math.max(...cluster.map(p => p.x));
|
||
|
|
const minY = Math.min(...cluster.map(p => p.y));
|
||
|
|
const maxY = Math.max(...cluster.map(p => p.y));
|
||
|
|
|
||
|
|
const boxWidth = maxX - minX;
|
||
|
|
const boxHeight = maxY - minY;
|
||
|
|
|
||
|
|
// 放宽尺寸限制
|
||
|
|
if (boxWidth < 2 || boxHeight < 2) continue;
|
||
|
|
|
||
|
|
const aspectRatio = boxWidth / boxHeight;
|
||
|
|
// 放宽宽高比限制
|
||
|
|
if (aspectRatio > 100 || aspectRatio < 0.01) continue;
|
||
|
|
|
||
|
|
const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;
|
||
|
|
|
||
|
|
// 降低框置信度阈值
|
||
|
|
const boxThreshold = this.config.detBoxThresh || 0.1;
|
||
|
|
if (avgConfidence > boxThreshold) {
|
||
|
|
const box = this.scaleBoxToProcessedImage({
|
||
|
|
x1: minX, y1: minY,
|
||
|
|
x2: maxX, y2: minY,
|
||
|
|
x3: maxX, y3: maxY,
|
||
|
|
x4: minX, y4: maxY
|
||
|
|
}, processedImage);
|
||
|
|
box.confidence = avgConfidence;
|
||
|
|
boxes.push(box);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
boxes.sort((a, b) => b.confidence - a.confidence);
|
||
|
|
console.log(`✅ 检测到 ${boxes.length} 个文本区域`);
|
||
|
|
return boxes;
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error('检测后处理错误:', error);
|
||
|
|
return [];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 添加局部最大值检测
|
||
|
|
isLocalMaximum(data, x, y, width, height, radius) {
|
||
|
|
const centerProb = data[y * width + x];
|
||
|
|
for (let dy = -radius; dy <= radius; dy++) {
|
||
|
|
for (let dx = -radius; dx <= radius; dx++) {
|
||
|
|
if (dx === 0 && dy === 0) continue;
|
||
|
|
const nx = x + dx;
|
||
|
|
const ny = y + dy;
|
||
|
|
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
|
||
|
|
if (data[ny * width + nx] > centerProb) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 改进的聚类算法
|
||
|
|
enhancedCluster(points, distanceThreshold) {
|
||
|
|
const clusters = [];
|
||
|
|
const visited = new Set();
|
||
|
|
|
||
|
|
// 按概率降序排序,优先处理高置信度点
|
||
|
|
const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);
|
||
|
|
|
||
|
|
for (let i = 0; i < sortedPoints.length; i++) {
|
||
|
|
if (visited.has(i)) continue;
|
||
|
|
|
||
|
|
const cluster = [];
|
||
|
|
const queue = [i];
|
||
|
|
visited.add(i);
|
||
|
|
|
||
|
|
while (queue.length > 0) {
|
||
|
|
const currentIndex = queue.shift();
|
||
|
|
const currentPoint = sortedPoints[currentIndex];
|
||
|
|
cluster.push(currentPoint);
|
||
|
|
|
||
|
|
// 动态调整搜索半径
|
||
|
|
const adaptiveThreshold = distanceThreshold *
|
||
|
|
(1 + (1 - currentPoint.prob) * 0.5);
|
||
|
|
|
||
|
|
for (let j = 0; j < sortedPoints.length; j++) {
|
||
|
|
if (visited.has(j)) continue;
|
||
|
|
|
||
|
|
const targetPoint = sortedPoints[j];
|
||
|
|
const dist = Math.sqrt(
|
||
|
|
Math.pow(targetPoint.x - currentPoint.x, 2) +
|
||
|
|
Math.pow(targetPoint.y - currentPoint.y, 2)
|
||
|
|
);
|
||
|
|
|
||
|
|
if (dist < adaptiveThreshold) {
|
||
|
|
queue.push(j);
|
||
|
|
visited.add(j);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (cluster.length > 0) {
|
||
|
|
clusters.push(cluster);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return clusters;
|
||
|
|
}
|
||
|
|
|
||
|
|
scaleBoxToProcessedImage(box, processedImage) {
|
||
|
|
const { width: processedWidth, height: processedHeight } = processedImage;
|
||
|
|
|
||
|
|
const scaledBox = {
|
||
|
|
x1: box.x1,
|
||
|
|
y1: box.y1,
|
||
|
|
x2: box.x2,
|
||
|
|
y2: box.y2,
|
||
|
|
x3: box.x3,
|
||
|
|
y3: box.y3,
|
||
|
|
x4: box.x4,
|
||
|
|
y4: box.y4
|
||
|
|
};
|
||
|
|
|
||
|
|
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
||
|
|
|
||
|
|
return {
|
||
|
|
x1: clamp(scaledBox.x1, processedWidth - 1),
|
||
|
|
y1: clamp(scaledBox.y1, processedHeight - 1),
|
||
|
|
x2: clamp(scaledBox.x2, processedWidth - 1),
|
||
|
|
y2: clamp(scaledBox.y2, processedHeight - 1),
|
||
|
|
x3: clamp(scaledBox.x3, processedWidth - 1),
|
||
|
|
y3: clamp(scaledBox.y3, processedHeight - 1),
|
||
|
|
x4: clamp(scaledBox.x4, processedWidth - 1),
|
||
|
|
y4: clamp(scaledBox.y4, processedHeight - 1)
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
scaleBoxToOriginalImage(box, processedImage) {
|
||
|
|
const {
|
||
|
|
scaleX, scaleY,
|
||
|
|
paddingX, paddingY,
|
||
|
|
originalWidth, originalHeight
|
||
|
|
} = processedImage;
|
||
|
|
|
||
|
|
const paddedX1 = box.x1 * scaleX;
|
||
|
|
const paddedY1 = box.y1 * scaleY;
|
||
|
|
const paddedX3 = box.x3 * scaleX;
|
||
|
|
const paddedY3 = box.y3 * scaleY;
|
||
|
|
|
||
|
|
const originalX1 = paddedX1 - paddingX;
|
||
|
|
const originalY1 = paddedY1 - paddingY;
|
||
|
|
const originalX3 = paddedX3 - paddingX;
|
||
|
|
const originalY3 = paddedY3 - paddingY;
|
||
|
|
|
||
|
|
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
||
|
|
|
||
|
|
return {
|
||
|
|
x1: clamp(originalX1, originalWidth - 1),
|
||
|
|
y1: clamp(originalY1, originalHeight - 1),
|
||
|
|
x2: clamp(originalX3, originalWidth - 1),
|
||
|
|
y2: clamp(originalY1, originalHeight - 1),
|
||
|
|
x3: clamp(originalX3, originalWidth - 1),
|
||
|
|
y3: clamp(originalY3, originalHeight - 1),
|
||
|
|
x4: clamp(originalX1, originalWidth - 1),
|
||
|
|
y4: clamp(originalY3, originalHeight - 1),
|
||
|
|
confidence: box.confidence
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export default DetectionProcessor;
|