init
这个提交包含在:
父节点
741771b365
当前提交
33e24b20a7
@ -325,35 +325,6 @@ app.get('/api/files/:id/preview', async (req, res) => {
|
|||||||
res.status(500).json({ error: 'Failed to get file preview' });
|
res.status(500).json({ error: 'Failed to get file preview' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// 更新批量OCR接口
|
|
||||||
app.post('/api/ocr/batch-recognize', async (req, res) => {
|
|
||||||
try {
|
|
||||||
const { fileIds, config } = req.body;
|
|
||||||
|
|
||||||
if (!fileIds || !Array.isArray(fileIds)) {
|
|
||||||
return res.status(400).json({ error: 'File IDs array is required' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const filePaths = [];
|
|
||||||
for (const fileId of fileIds) {
|
|
||||||
const file = await fileService.getFileById(parseInt(fileId));
|
|
||||||
if (file) {
|
|
||||||
filePaths.push(file.filePath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const results = await onnxOcrManager.batchRecognize(filePaths, config);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
data: results
|
|
||||||
});
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
console.error('批量ONNX OCR识别失败:', error);
|
|
||||||
res.status(500).json({ error: '批量识别失败: ' + error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
// 获取预处理后的图片
|
// 获取预处理后的图片
|
||||||
app.get('/api/ocr/processed-image', async (req, res) => {
|
app.get('/api/ocr/processed-image', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@ -6,27 +6,41 @@ class DetectionProcessor {
|
|||||||
constructor() {
|
constructor() {
|
||||||
this.session = null;
|
this.session = null;
|
||||||
this.config = null;
|
this.config = null;
|
||||||
|
this.logger = {
|
||||||
|
info: (msg, ...args) => console.log(`🔍 [检测] ${msg}`, ...args),
|
||||||
|
error: (msg, ...args) => console.error(`❌ [检测] ${msg}`, ...args),
|
||||||
|
debug: (msg, ...args) => console.log(`🐛 [检测] ${msg}`, ...args)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
initialize(session, config) {
|
initialize(session, config) {
|
||||||
this.session = session;
|
this.session = session;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
|
this.logger.info('检测处理器初始化完成');
|
||||||
}
|
}
|
||||||
|
|
||||||
async detectText(processedImage) {
|
async detectText(processedImage) {
|
||||||
|
const startTime = Date.now();
|
||||||
|
this.logger.info('开始文本检测');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const inputTensor = await this.prepareDetectionInput(processedImage);
|
const inputTensor = await this.prepareDetectionInput(processedImage);
|
||||||
const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
|
const outputs = await this.session.run({ [this.session.inputNames[0]]: inputTensor });
|
||||||
const textBoxes = this.postprocessDetection(outputs, processedImage);
|
const textBoxes = this.postprocessDetection(outputs, processedImage);
|
||||||
|
|
||||||
|
const processingTime = Date.now() - startTime;
|
||||||
|
this.logger.info(`检测完成: ${textBoxes.length}个区域, 耗时${processingTime}ms`);
|
||||||
|
|
||||||
return textBoxes;
|
return textBoxes;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('文本检测失败:', error);
|
this.logger.error('检测失败', error);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async prepareDetectionInput(processedImage) {
|
async prepareDetectionInput(processedImage) {
|
||||||
const { buffer, width, height } = processedImage;
|
const { buffer, width, height } = processedImage;
|
||||||
|
this.logger.debug(`准备检测输入: ${width}x${height}`);
|
||||||
|
|
||||||
const imageData = await sharp(buffer)
|
const imageData = await sharp(buffer)
|
||||||
.ensureAlpha()
|
.ensureAlpha()
|
||||||
@ -37,68 +51,123 @@ class DetectionProcessor {
|
|||||||
const data = imageData.data;
|
const data = imageData.data;
|
||||||
const channels = imageData.info.channels;
|
const channels = imageData.info.channels;
|
||||||
|
|
||||||
|
// 优化数据填充逻辑
|
||||||
for (let i = 0; i < data.length; i += channels) {
|
for (let i = 0; i < data.length; i += channels) {
|
||||||
const pixelIndex = Math.floor(i / channels);
|
const pixelIndex = Math.floor(i / channels);
|
||||||
const channel = Math.floor(pixelIndex / (height * width));
|
const y = Math.floor(pixelIndex / width);
|
||||||
const posInChannel = pixelIndex % (height * width);
|
const x = pixelIndex % width;
|
||||||
|
|
||||||
if (channel < 3) {
|
|
||||||
const y = Math.floor(posInChannel / width);
|
|
||||||
const x = posInChannel % width;
|
|
||||||
const inputIndex = channel * height * width + y * width + x;
|
|
||||||
|
|
||||||
|
for (let c = 0; c < 3; c++) {
|
||||||
|
const inputIndex = c * height * width + y * width + x;
|
||||||
if (inputIndex < inputData.length) {
|
if (inputIndex < inputData.length) {
|
||||||
inputData[inputIndex] = data[i] / 255.0;
|
inputData[inputIndex] = data[i] / 255.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.debug('检测输入张量准备完成');
|
||||||
return new Tensor('float32', inputData, [1, 3, height, width]);
|
return new Tensor('float32', inputData, [1, 3, height, width]);
|
||||||
}
|
}
|
||||||
|
|
||||||
postprocessDetection(outputs, processedImage) {
|
postprocessDetection(outputs, processedImage) {
|
||||||
|
this.logger.debug('开始检测后处理');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const boxes = [];
|
const boxes = [];
|
||||||
const outputNames = this.session.outputNames;
|
const outputNames = this.session.outputNames;
|
||||||
const detectionOutput = outputs[outputNames[0]];
|
const detectionOutput = outputs[outputNames[0]];
|
||||||
|
|
||||||
if (!detectionOutput) {
|
if (!detectionOutput) {
|
||||||
|
this.logger.debug('检测输出为空');
|
||||||
return boxes;
|
return boxes;
|
||||||
}
|
}
|
||||||
|
|
||||||
const [batch, channels, height, width] = detectionOutput.dims;
|
const [batch, channels, height, width] = detectionOutput.dims;
|
||||||
const data = detectionOutput.data;
|
const data = detectionOutput.data;
|
||||||
|
|
||||||
// 降低检测阈值,提高召回率
|
// 动态阈值调整
|
||||||
const threshold = this.config.detThresh || 0.05;
|
const baseThreshold = this.config.detThresh || 0.05;
|
||||||
const points = [];
|
const adaptiveThreshold = this.calculateAdaptiveThreshold(data, baseThreshold);
|
||||||
|
|
||||||
|
this.logger.debug(`使用检测阈值: ${adaptiveThreshold.toFixed(4)}`);
|
||||||
|
|
||||||
|
const points = this.collectDetectionPoints(data, width, height, adaptiveThreshold);
|
||||||
|
|
||||||
|
if (points.length === 0) {
|
||||||
|
this.logger.debug('未检测到有效文本点');
|
||||||
|
return boxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.debug(`收集到 ${points.length} 个检测点`);
|
||||||
|
const clusters = this.enhancedCluster(points, this.config.clusterDistance || 8);
|
||||||
|
this.logger.debug(`聚类得到 ${clusters.length} 个区域`);
|
||||||
|
|
||||||
|
const validBoxes = this.filterAndScaleBoxes(clusters, processedImage);
|
||||||
|
this.logger.info(`生成 ${validBoxes.length} 个有效文本框`);
|
||||||
|
|
||||||
|
return validBoxes.sort((a, b) => b.confidence - a.confidence);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('检测后处理错误', error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
collectDetectionPoints(data, width, height, threshold) {
|
||||||
|
const points = [];
|
||||||
|
let totalProb = 0;
|
||||||
|
let maxProb = 0;
|
||||||
|
|
||||||
// 改进的点收集逻辑
|
|
||||||
for (let y = 0; y < height; y++) {
|
for (let y = 0; y < height; y++) {
|
||||||
for (let x = 0; x < width; x++) {
|
for (let x = 0; x < width; x++) {
|
||||||
const idx = y * width + x;
|
const idx = y * width + x;
|
||||||
const prob = data[idx];
|
const prob = data[idx];
|
||||||
|
|
||||||
if (prob > threshold) {
|
if (prob > threshold) {
|
||||||
|
totalProb += prob;
|
||||||
|
maxProb = Math.max(maxProb, prob);
|
||||||
points.push({
|
points.push({
|
||||||
x,
|
x, y, prob,
|
||||||
y,
|
|
||||||
prob,
|
|
||||||
localMax: this.isLocalMaximum(data, x, y, width, height, 2)
|
localMax: this.isLocalMaximum(data, x, y, width, height, 2)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (points.length === 0) {
|
if (points.length > 0) {
|
||||||
return boxes;
|
this.logger.debug(`检测点统计: 平均置信度 ${(totalProb/points.length).toFixed(4)}, 最大置信度 ${maxProb.toFixed(4)}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 改进的聚类算法
|
return points;
|
||||||
const clusters = this.enhancedCluster(points, 8);
|
}
|
||||||
|
|
||||||
|
calculateAdaptiveThreshold(data, baseThreshold) {
|
||||||
|
// 基于图像特性动态调整阈值
|
||||||
|
let sum = 0;
|
||||||
|
let count = 0;
|
||||||
|
const sampleSize = Math.min(1000, data.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < sampleSize; i++) {
|
||||||
|
const idx = Math.floor(Math.random() * data.length);
|
||||||
|
if (data[idx] > baseThreshold) {
|
||||||
|
sum += data[idx];
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count === 0) return baseThreshold;
|
||||||
|
|
||||||
|
const mean = sum / count;
|
||||||
|
return Math.min(baseThreshold * 1.5, mean * 0.8);
|
||||||
|
}
|
||||||
|
|
||||||
|
filterAndScaleBoxes(clusters, processedImage) {
|
||||||
|
const boxes = [];
|
||||||
|
const minPoints = this.config.minClusterPoints || 2;
|
||||||
|
const boxThreshold = this.config.detBoxThresh || 0.1;
|
||||||
|
|
||||||
for (const cluster of clusters) {
|
for (const cluster of clusters) {
|
||||||
// 降低最小点数要求
|
if (cluster.length < minPoints) continue;
|
||||||
if (cluster.length < 2) continue;
|
|
||||||
|
|
||||||
const minX = Math.min(...cluster.map(p => p.x));
|
const minX = Math.min(...cluster.map(p => p.x));
|
||||||
const maxX = Math.max(...cluster.map(p => p.x));
|
const maxX = Math.max(...cluster.map(p => p.x));
|
||||||
@ -108,17 +177,14 @@ class DetectionProcessor {
|
|||||||
const boxWidth = maxX - minX;
|
const boxWidth = maxX - minX;
|
||||||
const boxHeight = maxY - minY;
|
const boxHeight = maxY - minY;
|
||||||
|
|
||||||
// 放宽尺寸限制
|
// 放宽尺寸限制,提高小文本检测
|
||||||
if (boxWidth < 2 || boxHeight < 2) continue;
|
if (boxWidth < 1 || boxHeight < 1) continue;
|
||||||
|
|
||||||
const aspectRatio = boxWidth / boxHeight;
|
const aspectRatio = boxWidth / boxHeight;
|
||||||
// 放宽宽高比限制
|
if (aspectRatio > 150 || aspectRatio < 0.005) continue;
|
||||||
if (aspectRatio > 100 || aspectRatio < 0.01) continue;
|
|
||||||
|
|
||||||
const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;
|
const avgConfidence = cluster.reduce((sum, p) => sum + p.prob, 0) / cluster.length;
|
||||||
|
|
||||||
// 降低框置信度阈值
|
|
||||||
const boxThreshold = this.config.detBoxThresh || 0.1;
|
|
||||||
if (avgConfidence > boxThreshold) {
|
if (avgConfidence > boxThreshold) {
|
||||||
const box = this.scaleBoxToProcessedImage({
|
const box = this.scaleBoxToProcessedImage({
|
||||||
x1: minX, y1: minY,
|
x1: minX, y1: minY,
|
||||||
@ -131,17 +197,9 @@ class DetectionProcessor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boxes.sort((a, b) => b.confidence - a.confidence);
|
|
||||||
console.log(`✅ 检测到 ${boxes.length} 个文本区域`);
|
|
||||||
return boxes;
|
return boxes;
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
console.error('检测后处理错误:', error);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 添加局部最大值检测
|
|
||||||
isLocalMaximum(data, x, y, width, height, radius) {
|
isLocalMaximum(data, x, y, width, height, radius) {
|
||||||
const centerProb = data[y * width + x];
|
const centerProb = data[y * width + x];
|
||||||
for (let dy = -radius; dy <= radius; dy++) {
|
for (let dy = -radius; dy <= radius; dy++) {
|
||||||
@ -159,12 +217,9 @@ class DetectionProcessor {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 改进的聚类算法
|
|
||||||
enhancedCluster(points, distanceThreshold) {
|
enhancedCluster(points, distanceThreshold) {
|
||||||
const clusters = [];
|
const clusters = [];
|
||||||
const visited = new Set();
|
const visited = new Set();
|
||||||
|
|
||||||
// 按概率降序排序,优先处理高置信度点
|
|
||||||
const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);
|
const sortedPoints = [...points].sort((a, b) => b.prob - a.prob);
|
||||||
|
|
||||||
for (let i = 0; i < sortedPoints.length; i++) {
|
for (let i = 0; i < sortedPoints.length; i++) {
|
||||||
@ -180,8 +235,7 @@ class DetectionProcessor {
|
|||||||
cluster.push(currentPoint);
|
cluster.push(currentPoint);
|
||||||
|
|
||||||
// 动态调整搜索半径
|
// 动态调整搜索半径
|
||||||
const adaptiveThreshold = distanceThreshold *
|
const adaptiveThreshold = distanceThreshold * (1 + (1 - currentPoint.prob) * 0.3);
|
||||||
(1 + (1 - currentPoint.prob) * 0.5);
|
|
||||||
|
|
||||||
for (let j = 0; j < sortedPoints.length; j++) {
|
for (let j = 0; j < sortedPoints.length; j++) {
|
||||||
if (visited.has(j)) continue;
|
if (visited.has(j)) continue;
|
||||||
@ -209,61 +263,17 @@ class DetectionProcessor {
|
|||||||
|
|
||||||
scaleBoxToProcessedImage(box, processedImage) {
|
scaleBoxToProcessedImage(box, processedImage) {
|
||||||
const { width: processedWidth, height: processedHeight } = processedImage;
|
const { width: processedWidth, height: processedHeight } = processedImage;
|
||||||
|
|
||||||
const scaledBox = {
|
|
||||||
x1: box.x1,
|
|
||||||
y1: box.y1,
|
|
||||||
x2: box.x2,
|
|
||||||
y2: box.y2,
|
|
||||||
x3: box.x3,
|
|
||||||
y3: box.y3,
|
|
||||||
x4: box.x4,
|
|
||||||
y4: box.y4
|
|
||||||
};
|
|
||||||
|
|
||||||
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
||||||
|
|
||||||
return {
|
return {
|
||||||
x1: clamp(scaledBox.x1, processedWidth - 1),
|
x1: clamp(box.x1, processedWidth - 1),
|
||||||
y1: clamp(scaledBox.y1, processedHeight - 1),
|
y1: clamp(box.y1, processedHeight - 1),
|
||||||
x2: clamp(scaledBox.x2, processedWidth - 1),
|
x2: clamp(box.x2, processedWidth - 1),
|
||||||
y2: clamp(scaledBox.y2, processedHeight - 1),
|
y2: clamp(box.y2, processedHeight - 1),
|
||||||
x3: clamp(scaledBox.x3, processedWidth - 1),
|
x3: clamp(box.x3, processedWidth - 1),
|
||||||
y3: clamp(scaledBox.y3, processedHeight - 1),
|
y3: clamp(box.y3, processedHeight - 1),
|
||||||
x4: clamp(scaledBox.x4, processedWidth - 1),
|
x4: clamp(box.x4, processedWidth - 1),
|
||||||
y4: clamp(scaledBox.y4, processedHeight - 1)
|
y4: clamp(box.y4, processedHeight - 1)
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
scaleBoxToOriginalImage(box, processedImage) {
|
|
||||||
const {
|
|
||||||
scaleX, scaleY,
|
|
||||||
paddingX, paddingY,
|
|
||||||
originalWidth, originalHeight
|
|
||||||
} = processedImage;
|
|
||||||
|
|
||||||
const paddedX1 = box.x1 * scaleX;
|
|
||||||
const paddedY1 = box.y1 * scaleY;
|
|
||||||
const paddedX3 = box.x3 * scaleX;
|
|
||||||
const paddedY3 = box.y3 * scaleY;
|
|
||||||
|
|
||||||
const originalX1 = paddedX1 - paddingX;
|
|
||||||
const originalY1 = paddedY1 - paddingY;
|
|
||||||
const originalX3 = paddedX3 - paddingX;
|
|
||||||
const originalY3 = paddedY3 - paddingY;
|
|
||||||
|
|
||||||
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
|
||||||
|
|
||||||
return {
|
|
||||||
x1: clamp(originalX1, originalWidth - 1),
|
|
||||||
y1: clamp(originalY1, originalHeight - 1),
|
|
||||||
x2: clamp(originalX3, originalWidth - 1),
|
|
||||||
y2: clamp(originalY1, originalHeight - 1),
|
|
||||||
x3: clamp(originalX3, originalWidth - 1),
|
|
||||||
y3: clamp(originalY3, originalHeight - 1),
|
|
||||||
x4: clamp(originalX1, originalWidth - 1),
|
|
||||||
y4: clamp(originalY3, originalHeight - 1),
|
|
||||||
confidence: box.confidence
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,20 +4,28 @@ import sharp from 'sharp';
|
|||||||
class ImagePreprocessor {
|
class ImagePreprocessor {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.tempDir = './temp/processed';
|
this.tempDir = './temp/processed';
|
||||||
|
this.logger = {
|
||||||
|
info: (msg, ...args) => console.log(`🖼️ [预处理] ${msg}`, ...args),
|
||||||
|
error: (msg, ...args) => console.error(`❌ [预处理] ${msg}`, ...args),
|
||||||
|
debug: (msg, ...args) => console.debug(`❌ [预处理] ${msg}`, ...args)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async preprocessWithPadding(imagePath, config) {
|
async preprocessWithPadding(imagePath, config) {
|
||||||
|
const startTime = Date.now();
|
||||||
|
this.logger.info(`开始预处理: ${imagePath}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const metadata = await sharp(imagePath).metadata();
|
const metadata = await sharp(imagePath).metadata();
|
||||||
|
this.logger.info(`原始尺寸: ${metadata.width}x${metadata.height}`);
|
||||||
|
|
||||||
// 减少填充,避免过度改变图像
|
// 智能填充策略
|
||||||
const minPadding = 30;
|
const { paddingX, paddingY } = this.calculateSmartPadding(metadata);
|
||||||
const paddingX = Math.max(minPadding, Math.floor(metadata.width * 0.05));
|
|
||||||
const paddingY = Math.max(minPadding, Math.floor(metadata.height * 0.05));
|
|
||||||
|
|
||||||
const paddedWidth = metadata.width + paddingX * 2;
|
const paddedWidth = metadata.width + paddingX * 2;
|
||||||
const paddedHeight = metadata.height + paddingY * 2;
|
const paddedHeight = metadata.height + paddingY * 2;
|
||||||
|
|
||||||
|
this.logger.debug(`添加填充: ${paddingX}x${paddingY}, 新尺寸: ${paddedWidth}x${paddedHeight}`);
|
||||||
|
|
||||||
const paddedBuffer = await sharp(imagePath)
|
const paddedBuffer = await sharp(imagePath)
|
||||||
.extend({
|
.extend({
|
||||||
top: paddingY,
|
top: paddingY,
|
||||||
@ -39,30 +47,42 @@ class ImagePreprocessor {
|
|||||||
.png()
|
.png()
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
|
|
||||||
console.log(`🖼️ 图像预处理完成: ${metadata.width}x${metadata.height} -> ${width}x${height}`);
|
const processingTime = Date.now() - startTime;
|
||||||
|
this.logger.info(`预处理完成: ${width}x${height}, 耗时${processingTime}ms`);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
processedImage: {
|
processedImage: {
|
||||||
buffer: resizedBuffer,
|
buffer: resizedBuffer,
|
||||||
width,
|
width, height,
|
||||||
height,
|
|
||||||
originalWidth: metadata.width,
|
originalWidth: metadata.width,
|
||||||
originalHeight: metadata.height,
|
originalHeight: metadata.height,
|
||||||
paddedWidth: paddedWidth,
|
paddedWidth, paddedHeight,
|
||||||
paddedHeight: paddedHeight,
|
paddingX, paddingY,
|
||||||
paddingX,
|
|
||||||
paddingY,
|
|
||||||
scaleX: paddedWidth / width,
|
scaleX: paddedWidth / width,
|
||||||
scaleY: paddedHeight / height
|
scaleY: paddedHeight / height
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('预处理错误:', error);
|
this.logger.error('预处理错误', error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
calculateSmartPadding(metadata) {
|
||||||
|
const basePadding = 20;
|
||||||
|
const minPadding = 15;
|
||||||
|
|
||||||
|
// 根据图像尺寸动态调整填充
|
||||||
|
const widthRatio = Math.max(0.02, Math.min(0.08, 100 / metadata.width));
|
||||||
|
const heightRatio = Math.max(0.02, Math.min(0.08, 100 / metadata.height));
|
||||||
|
|
||||||
|
return {
|
||||||
|
paddingX: Math.max(minPadding, Math.floor(metadata.width * widthRatio)),
|
||||||
|
paddingY: Math.max(minPadding, Math.floor(metadata.height * heightRatio))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
resizeForDetection(metadata, config) {
|
resizeForDetection(metadata, config) {
|
||||||
const { width, height } = metadata;
|
const { width, height } = metadata;
|
||||||
const limitSideLen = config.detLimitSideLen || 960;
|
const limitSideLen = config.detLimitSideLen || 960;
|
||||||
@ -70,15 +90,18 @@ class ImagePreprocessor {
|
|||||||
let ratio = 1;
|
let ratio = 1;
|
||||||
if (Math.max(width, height) > limitSideLen) {
|
if (Math.max(width, height) > limitSideLen) {
|
||||||
ratio = limitSideLen / Math.max(width, height);
|
ratio = limitSideLen / Math.max(width, height);
|
||||||
|
this.logger.debug(`缩放比例: ${ratio.toFixed(4)}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const newWidth = Math.floor(width * ratio);
|
const newWidth = Math.floor(width * ratio);
|
||||||
const newHeight = Math.floor(height * ratio);
|
const newHeight = Math.floor(height * ratio);
|
||||||
|
|
||||||
return {
|
// 确保尺寸是32的倍数
|
||||||
width: Math.max(32, Math.floor(newWidth / 32) * 32),
|
const finalWidth = Math.max(32, Math.floor(newWidth / 32) * 32);
|
||||||
height: Math.max(32, Math.floor(newHeight / 32) * 32)
|
const finalHeight = Math.max(32, Math.floor(newHeight / 32) * 32);
|
||||||
};
|
|
||||||
|
this.logger.debug(`调整后尺寸: ${finalWidth}x${finalHeight}`);
|
||||||
|
return { width: finalWidth, height: finalHeight };
|
||||||
}
|
}
|
||||||
|
|
||||||
async getImageInfo(imagePath) {
|
async getImageInfo(imagePath) {
|
||||||
@ -91,11 +114,9 @@ class ImagePreprocessor {
|
|||||||
processed: false
|
processed: false
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
this.logger.error('获取图像信息失败', error);
|
||||||
return {
|
return {
|
||||||
width: 0,
|
width: 0, height: 0, format: 'unknown', processed: false
|
||||||
height: 0,
|
|
||||||
format: 'unknown',
|
|
||||||
processed: false
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
// server/utils/onnxOcrManager.js
|
// server/utils/onnxOcrManager.js
|
||||||
import { InferenceSession } from 'onnxruntime-node';
|
import { InferenceSession } from 'onnxruntime-node';
|
||||||
|
import sharp from 'sharp';
|
||||||
import fse from 'fs-extra';
|
import fse from 'fs-extra';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
@ -29,31 +30,41 @@ class OnnxOcrManager {
|
|||||||
this.imagePreprocessor = new ImagePreprocessor();
|
this.imagePreprocessor = new ImagePreprocessor();
|
||||||
this.textPostProcessor = new TextPostProcessor();
|
this.textPostProcessor = new TextPostProcessor();
|
||||||
|
|
||||||
// 更新默认配置,优化识别效果
|
this.logger = {
|
||||||
|
info: (msg, ...args) => console.log(`🚀 [OCR管理器] ${msg}`, ...args),
|
||||||
|
error: (msg, ...args) => console.error(`❌ [OCR管理器] ${msg}`, ...args),
|
||||||
|
debug: (msg, ...args) => console.log(`🐛 [OCR管理器] ${msg}`, ...args)
|
||||||
|
};
|
||||||
|
|
||||||
|
// 确保可视化目录存在
|
||||||
|
this.visualizationDir = path.join(process.cwd(), 'temp', 'visualization');
|
||||||
|
fse.ensureDirSync(this.visualizationDir);
|
||||||
|
|
||||||
|
// 优化配置参数
|
||||||
this.defaultConfig = {
|
this.defaultConfig = {
|
||||||
language: 'ch',
|
language: 'ch',
|
||||||
detLimitSideLen: 960,
|
detLimitSideLen: 960,
|
||||||
detThresh: 0.05, // 降低检测阈值
|
detThresh: 0.05,
|
||||||
detBoxThresh: 0.1, // 降低框阈值
|
detBoxThresh: 0.08,
|
||||||
detUnclipRatio: 1.8, // 调整解压缩比例
|
detUnclipRatio: 1.8,
|
||||||
maxTextLength: 50, // 增加最大文本长度
|
maxTextLength: 100,
|
||||||
recImageHeight: 48,
|
recImageHeight: 48,
|
||||||
clsThresh: 0.8, // 降低分类阈值
|
clsThresh: 0.7,
|
||||||
minTextHeight: 2, // 降低最小文本高度
|
minTextHeight: 1,
|
||||||
minTextWidth: 2, // 降低最小文本宽度
|
minTextWidth: 1,
|
||||||
clusterDistance: 8, // 调整聚类距离
|
clusterDistance: 8,
|
||||||
minClusterPoints: 2 // 降低最小聚类点数
|
minClusterPoints: 1
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async initialize(config = {}) {
|
async initialize(config = {}) {
|
||||||
if (this.isInitialized) {
|
if (this.isInitialized) {
|
||||||
console.log('🔁 OCR管理器已初始化');
|
this.logger.info('OCR管理器已初始化');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log('🚀 开始初始化OCR管理器...');
|
this.logger.info('开始初始化OCR管理器...');
|
||||||
await this.validateModelFiles();
|
await this.validateModelFiles();
|
||||||
await this.recognitionProcessor.loadCharacterSet(this.keysPath);
|
await this.recognitionProcessor.loadCharacterSet(this.keysPath);
|
||||||
|
|
||||||
@ -73,10 +84,10 @@ class OnnxOcrManager {
|
|||||||
this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);
|
this.recognitionProcessor.initialize(this.recSession, this.clsSession, mergedConfig);
|
||||||
|
|
||||||
this.isInitialized = true;
|
this.isInitialized = true;
|
||||||
console.log('✅ OCR管理器初始化完成');
|
this.logger.info('OCR管理器初始化完成');
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('❌ OCR管理器初始化失败:', error);
|
this.logger.error('初始化失败', error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -94,8 +105,9 @@ class OnnxOcrManager {
|
|||||||
if (!exists) {
|
if (!exists) {
|
||||||
throw new Error(`模型文件不存在: ${filePath}`);
|
throw new Error(`模型文件不存在: ${filePath}`);
|
||||||
}
|
}
|
||||||
|
this.logger.debug(`验证通过: ${name}`);
|
||||||
}
|
}
|
||||||
console.log('✅ 所有模型文件验证通过');
|
this.logger.info('所有模型文件验证通过');
|
||||||
}
|
}
|
||||||
|
|
||||||
async recognizeImage(imagePath, config = {}) {
|
async recognizeImage(imagePath, config = {}) {
|
||||||
@ -112,13 +124,17 @@ class OnnxOcrManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log(`\n🎯 开始OCR识别: ${path.basename(imagePath)}`);
|
this.logger.info(`开始OCR识别: ${path.basename(imagePath)}`);
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
|
const preprocessResult = await this.imagePreprocessor.preprocessWithPadding(imagePath, config);
|
||||||
const { processedImage } = preprocessResult;
|
const { processedImage } = preprocessResult;
|
||||||
|
|
||||||
const textBoxes = await this.detectionProcessor.detectText(processedImage);
|
const textBoxes = await this.detectionProcessor.detectText(processedImage);
|
||||||
|
|
||||||
|
// 在原始图像上绘制文本框
|
||||||
|
await this.drawTextBoxesOnOriginalImage(imagePath, textBoxes, processedImage);
|
||||||
|
|
||||||
const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);
|
const recognitionResults = await this.recognitionProcessor.recognizeTextWithCls(processedImage, textBoxes);
|
||||||
|
|
||||||
const processingTime = Date.now() - startTime;
|
const processingTime = Date.now() - startTime;
|
||||||
@ -138,30 +154,166 @@ class OnnxOcrManager {
|
|||||||
totalPages: 1,
|
totalPages: 1,
|
||||||
rawText,
|
rawText,
|
||||||
imageInfo,
|
imageInfo,
|
||||||
recognitionCount: recognitionResults.length
|
recognitionCount: recognitionResults.length,
|
||||||
|
detectionCount: textBoxes.length,
|
||||||
|
visualizationPath: this.getVisualizationPath(imagePath)
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log(`\n📊 OCR识别统计:`);
|
this.logger.info(`OCR识别完成:
|
||||||
console.log(` - 处理时间: ${processingTime}ms`);
|
- 处理时间: ${processingTime}ms
|
||||||
console.log(` - 检测区域: ${textBoxes.length} 个`);
|
- 检测区域: ${textBoxes.length}个
|
||||||
console.log(` - 成功识别: ${recognitionResults.length} 个`);
|
- 成功识别: ${recognitionResults.length}个
|
||||||
console.log(` - 总体置信度: ${overallConfidence.toFixed(4)}`);
|
- 总体置信度: ${overallConfidence.toFixed(4)}
|
||||||
console.log(` - 最终文本长度: ${rawText.length} 字符`);
|
- 最终文本: ${rawText.length}字符
|
||||||
|
- 可视化图像: ${result.visualizationPath}`);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`❌ OCR识别失败: ${error.message}`);
|
this.logger.error(`OCR识别失败: ${error.message}`);
|
||||||
throw new Error(`OCR识别失败: ${error.message}`);
|
throw new Error(`OCR识别失败: ${error.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async drawTextBoxesOnOriginalImage(originalImagePath, textBoxes, processedImage) {
|
||||||
|
try {
|
||||||
|
this.logger.info('开始在原始图像上绘制文本框');
|
||||||
|
|
||||||
|
// 读取原始图像
|
||||||
|
const originalImage = sharp(originalImagePath);
|
||||||
|
const metadata = await originalImage.metadata();
|
||||||
|
|
||||||
|
// 创建SVG绘制指令
|
||||||
|
const svgOverlay = this.createTextBoxesSVG(textBoxes, processedImage, metadata);
|
||||||
|
|
||||||
|
// 将SVG叠加到原始图像上
|
||||||
|
const visualizationPath = this.getVisualizationPath(originalImagePath);
|
||||||
|
await originalImage
|
||||||
|
.composite([{
|
||||||
|
input: Buffer.from(svgOverlay),
|
||||||
|
top: 0,
|
||||||
|
left: 0
|
||||||
|
}])
|
||||||
|
.png()
|
||||||
|
.toFile(visualizationPath);
|
||||||
|
|
||||||
|
this.logger.info(`文本框可视化图像已保存: ${visualizationPath}`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('绘制文本框失败', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
createTextBoxesSVG(textBoxes, processedImage, originalMetadata) {
|
||||||
|
const { width, height } = originalMetadata;
|
||||||
|
|
||||||
|
let svg = `<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg">`;
|
||||||
|
|
||||||
|
// 定义样式
|
||||||
|
svg += `
|
||||||
|
<style>
|
||||||
|
.text-box {
|
||||||
|
fill: none;
|
||||||
|
stroke: #ff0000;
|
||||||
|
stroke-width: 2;
|
||||||
|
}
|
||||||
|
.text-box-high-conf {
|
||||||
|
fill: none;
|
||||||
|
stroke: #00ff00;
|
||||||
|
stroke-width: 2;
|
||||||
|
}
|
||||||
|
.text-label {
|
||||||
|
font-size: 12px;
|
||||||
|
fill: #ff0000;
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
`;
|
||||||
|
|
||||||
|
textBoxes.forEach((box, index) => {
|
||||||
|
// 将处理后的图像坐标转换回原始图像坐标
|
||||||
|
const originalBox = this.scaleBoxToOriginalImage(box, processedImage);
|
||||||
|
|
||||||
|
// 根据置信度选择颜色
|
||||||
|
const boxClass = box.confidence > 0.8 ? 'text-box-high-conf' : 'text-box';
|
||||||
|
|
||||||
|
// 绘制文本框(多边形)
|
||||||
|
const points = [
|
||||||
|
`${originalBox.x1},${originalBox.y1}`,
|
||||||
|
`${originalBox.x2},${originalBox.y2}`,
|
||||||
|
`${originalBox.x3},${originalBox.y3}`,
|
||||||
|
`${originalBox.x4},${originalBox.y4}`
|
||||||
|
].join(' ');
|
||||||
|
|
||||||
|
svg += `<polygon class="${boxClass}" points="${points}" />`;
|
||||||
|
|
||||||
|
// 在框上方添加索引和置信度标签
|
||||||
|
const labelX = Math.min(originalBox.x1, originalBox.x2, originalBox.x3, originalBox.x4);
|
||||||
|
const labelY = Math.min(originalBox.y1, originalBox.y2, originalBox.y3, originalBox.y4) - 5;
|
||||||
|
|
||||||
|
if (labelY > 15) { // 确保标签在图像范围内
|
||||||
|
svg += `<text class="text-label" x="${labelX}" y="${labelY}">${index + 1} (${box.confidence.toFixed(2)})</text>`;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
svg += '</svg>';
|
||||||
|
return svg;
|
||||||
|
}
|
||||||
|
|
||||||
|
scaleBoxToOriginalImage(box, processedImage) {
|
||||||
|
const {
|
||||||
|
scaleX, scaleY,
|
||||||
|
paddingX, paddingY,
|
||||||
|
originalWidth, originalHeight
|
||||||
|
} = processedImage;
|
||||||
|
|
||||||
|
// 将处理后的图像坐标转换回填充后的图像坐标
|
||||||
|
const paddedX1 = box.x1 * scaleX;
|
||||||
|
const paddedY1 = box.y1 * scaleY;
|
||||||
|
const paddedX2 = box.x2 * scaleX;
|
||||||
|
const paddedY2 = box.y2 * scaleY;
|
||||||
|
const paddedX3 = box.x3 * scaleX;
|
||||||
|
const paddedY3 = box.y3 * scaleY;
|
||||||
|
const paddedX4 = box.x4 * scaleX;
|
||||||
|
const paddedY4 = box.y4 * scaleY;
|
||||||
|
|
||||||
|
// 去除填充,得到原始图像坐标
|
||||||
|
const originalX1 = paddedX1 - paddingX;
|
||||||
|
const originalY1 = paddedY1 - paddingY;
|
||||||
|
const originalX2 = paddedX2 - paddingX;
|
||||||
|
const originalY2 = paddedY2 - paddingY;
|
||||||
|
const originalX3 = paddedX3 - paddingX;
|
||||||
|
const originalY3 = paddedY3 - paddingY;
|
||||||
|
const originalX4 = paddedX4 - paddingX;
|
||||||
|
const originalY4 = paddedY4 - paddingY;
|
||||||
|
|
||||||
|
const clamp = (value, max) => Math.max(0, Math.min(max, value));
|
||||||
|
|
||||||
|
return {
|
||||||
|
x1: clamp(originalX1, originalWidth - 1),
|
||||||
|
y1: clamp(originalY1, originalHeight - 1),
|
||||||
|
x2: clamp(originalX2, originalWidth - 1),
|
||||||
|
y2: clamp(originalY2, originalHeight - 1),
|
||||||
|
x3: clamp(originalX3, originalWidth - 1),
|
||||||
|
y3: clamp(originalY3, originalHeight - 1),
|
||||||
|
x4: clamp(originalX4, originalWidth - 1),
|
||||||
|
y4: clamp(originalY4, originalHeight - 1),
|
||||||
|
confidence: box.confidence
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
getVisualizationPath(originalImagePath) {
|
||||||
|
const originalName = path.basename(originalImagePath, path.extname(originalImagePath));
|
||||||
|
const timestamp = Date.now();
|
||||||
|
return path.join(this.visualizationDir, `${originalName}-detection-${timestamp}.png`);
|
||||||
|
}
|
||||||
|
|
||||||
getStatus() {
|
getStatus() {
|
||||||
return {
|
return {
|
||||||
isInitialized: this.isInitialized,
|
isInitialized: this.isInitialized,
|
||||||
isOffline: true,
|
isOffline: true,
|
||||||
engine: 'PP-OCRv3 (ONNX Runtime)',
|
engine: 'PP-OCRv3 (ONNX Runtime)',
|
||||||
version: '1.0.0',
|
version: '2.0.0',
|
||||||
models: {
|
models: {
|
||||||
detection: path.relative(process.cwd(), this.detModelPath),
|
detection: path.relative(process.cwd(), this.detModelPath),
|
||||||
recognition: path.relative(process.cwd(), this.recModelPath),
|
recognition: path.relative(process.cwd(), this.recModelPath),
|
||||||
@ -172,28 +324,11 @@ class OnnxOcrManager {
|
|||||||
detThresh: this.defaultConfig.detThresh,
|
detThresh: this.defaultConfig.detThresh,
|
||||||
detBoxThresh: this.defaultConfig.detBoxThresh,
|
detBoxThresh: this.defaultConfig.detBoxThresh,
|
||||||
clsThresh: this.defaultConfig.clsThresh,
|
clsThresh: this.defaultConfig.clsThresh,
|
||||||
preprocessing: 'enabled with padding'
|
preprocessing: 'enhanced with smart padding'
|
||||||
},
|
},
|
||||||
backend: 'CPU'
|
backend: 'CPU'
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async terminate() {
|
|
||||||
if (this.detSession) {
|
|
||||||
this.detSession.release();
|
|
||||||
this.detSession = null;
|
|
||||||
}
|
|
||||||
if (this.recSession) {
|
|
||||||
this.recSession.release();
|
|
||||||
this.recSession = null;
|
|
||||||
}
|
|
||||||
if (this.clsSession) {
|
|
||||||
this.clsSession.release();
|
|
||||||
this.clsSession = null;
|
|
||||||
}
|
|
||||||
this.isInitialized = false;
|
|
||||||
console.log('🛑 OCR管理器已终止');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const onnxOcrManager = new OnnxOcrManager();
|
const onnxOcrManager = new OnnxOcrManager();
|
||||||
|
|||||||
@ -61,7 +61,7 @@ class RecognitionProcessor {
|
|||||||
recognitionImage = await this.textRegionCropper.rotateImage(textRegion.buffer, 180);
|
recognitionImage = await this.textRegionCropper.rotateImage(textRegion.buffer, 180);
|
||||||
}
|
}
|
||||||
|
|
||||||
const textResult = await this.textRecognizer.recognizeText(recognitionImage);
|
const textResult = await this.textRecognizer.recognizeText(recognitionImage, i + 1);
|
||||||
|
|
||||||
if (textResult.text && textResult.text.trim().length > 0 && textResult.confidence > 0.05) {
|
if (textResult.text && textResult.text.trim().length > 0 && textResult.confidence > 0.05) {
|
||||||
const originalBox = this.scaleBoxToOriginalImage(box, processedImage);
|
const originalBox = this.scaleBoxToOriginalImage(box, processedImage);
|
||||||
|
|||||||
@ -10,12 +10,23 @@ class TextRecognizer {
|
|||||||
this.config = null;
|
this.config = null;
|
||||||
this.characterSet = [];
|
this.characterSet = [];
|
||||||
this.debugDir = path.join(process.cwd(), 'temp', 'debug');
|
this.debugDir = path.join(process.cwd(), 'temp', 'debug');
|
||||||
|
this.preprocessedDir = path.join(process.cwd(), 'temp', 'preprocessed');
|
||||||
|
this.logger = {
|
||||||
|
info: (msg, ...args) => console.log(`🔤 [识别] ${msg}`, ...args),
|
||||||
|
error: (msg, ...args) => console.error(`❌ [识别] ${msg}`, ...args),
|
||||||
|
debug: (msg, ...args) => console.log(`🐛 [识别] ${msg}`, ...args),
|
||||||
|
warn: (msg, ...args) => console.warn(`🐛 [识别] ${msg}`, ...args)
|
||||||
|
};
|
||||||
|
|
||||||
|
// 确保目录存在
|
||||||
fse.ensureDirSync(this.debugDir);
|
fse.ensureDirSync(this.debugDir);
|
||||||
|
fse.ensureDirSync(this.preprocessedDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
initialize(recSession, config) {
|
initialize(recSession, config) {
|
||||||
this.recSession = recSession;
|
this.recSession = recSession;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
|
this.logger.info('文本识别器初始化完成');
|
||||||
}
|
}
|
||||||
|
|
||||||
async loadCharacterSet(keysPath) {
|
async loadCharacterSet(keysPath) {
|
||||||
@ -24,299 +35,332 @@ class TextRecognizer {
|
|||||||
this.characterSet = [];
|
this.characterSet = [];
|
||||||
const lines = keysContent.split('\n');
|
const lines = keysContent.split('\n');
|
||||||
|
|
||||||
|
// 使用提供的字符集文件
|
||||||
|
const uniqueChars = new Set();
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
const trimmed = line.trim();
|
const trimmed = line.trim();
|
||||||
|
// 跳过空行和注释行
|
||||||
if (trimmed && !trimmed.startsWith('#')) {
|
if (trimmed && !trimmed.startsWith('#')) {
|
||||||
for (const char of trimmed) {
|
// 将每行作为一个完整的字符处理
|
||||||
if (char.trim() && !this.characterSet.includes(char)) {
|
uniqueChars.add(trimmed);
|
||||||
this.characterSet.push(char);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.characterSet = Array.from(uniqueChars);
|
||||||
|
|
||||||
if (this.characterSet.length === 0) {
|
if (this.characterSet.length === 0) {
|
||||||
throw new Error('字符集文件为空或格式不正确');
|
throw new Error('字符集文件为空或格式不正确');
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`✅ 字符集加载完成,共 ${this.characterSet.length} 个字符`);
|
this.logger.info(`字符集加载完成: ${this.characterSet.length}个字符`);
|
||||||
|
|
||||||
|
// 记录字符集统计信息
|
||||||
|
const charTypes = {
|
||||||
|
chinese: 0,
|
||||||
|
english: 0,
|
||||||
|
digit: 0,
|
||||||
|
punctuation: 0,
|
||||||
|
other: 0
|
||||||
|
};
|
||||||
|
|
||||||
|
this.characterSet.forEach(char => {
|
||||||
|
if (/[\u4e00-\u9fff]/.test(char)) {
|
||||||
|
charTypes.chinese++;
|
||||||
|
} else if (/[a-zA-Z]/.test(char)) {
|
||||||
|
charTypes.english++;
|
||||||
|
} else if (/[0-9]/.test(char)) {
|
||||||
|
charTypes.digit++;
|
||||||
|
} else if (/[,。!?;:""()【】《》…—·]/.test(char)) {
|
||||||
|
charTypes.punctuation++;
|
||||||
|
} else {
|
||||||
|
charTypes.other++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
this.logger.debug(`字符集统计: 中文${charTypes.chinese}, 英文${charTypes.english}, 数字${charTypes.digit}, 标点${charTypes.punctuation}, 其他${charTypes.other}`);
|
||||||
|
this.logger.debug(`前20个字符: ${this.characterSet.slice(0, 20).join('')}`);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('❌ 加载字符集失败,使用默认字符集:', error.message);
|
this.logger.error('加载字符集失败', error.message);
|
||||||
this.characterSet = this.getDefaultCharacterSet();
|
// 完全使用提供的字符集,失败时抛出错误
|
||||||
|
throw new Error(`字符集加载失败: ${error.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getDefaultCharacterSet() {
|
|
||||||
const defaultSet = [];
|
|
||||||
for (let i = 0; i <= 9; i++) defaultSet.push(i.toString());
|
|
||||||
for (let i = 97; i <= 122; i++) defaultSet.push(String.fromCharCode(i));
|
|
||||||
for (let i = 65; i <= 90; i++) defaultSet.push(String.fromCharCode(i));
|
|
||||||
defaultSet.push(...' ,。!?;:""()【】《》…—·'.split(''));
|
|
||||||
|
|
||||||
const commonChinese = '的一是不了在人有的我他这个们中来就时大地为子中你说道生国年着就那和要她出也得里后自以会家可下而过天去能对小多然于心学么之都好看起发当没成只如事把还用第样道想作种开美总从无情已面最女但现前些所同日手又行意动方期它头经长儿回位分爱老因很给名法间斯知世什两次使身者被高已亲其进此话常与活正感';
|
|
||||||
for (const char of commonChinese) {
|
|
||||||
defaultSet.push(char);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`📝 使用默认字符集,共 ${defaultSet.length} 个字符`);
|
|
||||||
return defaultSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
getCharacterSetSize() {
|
getCharacterSetSize() {
|
||||||
return this.characterSet.length;
|
return this.characterSet.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
async recognizeText(textRegionBuffer) {
|
async recognizeText(textRegionBuffer, regionIndex = 0) {
|
||||||
console.log('🔠 === 开始文本识别流程 ===');
|
const startTime = Date.now();
|
||||||
|
this.logger.info(`开始文本识别 - 区域 ${regionIndex}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log('📥 1. 准备识别输入...');
|
const inputTensor = await this.prepareRecognitionInput(textRegionBuffer, regionIndex);
|
||||||
console.log(` - 输入图像大小: ${textRegionBuffer.length} 字节`);
|
|
||||||
|
|
||||||
const inputTensor = await this.prepareRecognitionInput(textRegionBuffer);
|
|
||||||
console.log('✅ 输入张量准备完成');
|
|
||||||
console.log(` - 张量形状: [${inputTensor.dims.join(', ')}]`);
|
|
||||||
console.log(` - 张量类型: ${inputTensor.type}`);
|
|
||||||
console.log(` - 数据长度: ${inputTensor.data.length}`);
|
|
||||||
|
|
||||||
// 数据验证
|
|
||||||
const tensorData = inputTensor.data;
|
|
||||||
let minVal = Infinity;
|
|
||||||
let maxVal = -Infinity;
|
|
||||||
let sumVal = 0;
|
|
||||||
let validCount = 0;
|
|
||||||
|
|
||||||
for (let i = 0; i < Math.min(100, tensorData.length); i++) {
|
|
||||||
const val = tensorData[i];
|
|
||||||
if (!isNaN(val) && isFinite(val)) {
|
|
||||||
minVal = Math.min(minVal, val);
|
|
||||||
maxVal = Math.max(maxVal, val);
|
|
||||||
sumVal += val;
|
|
||||||
validCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(` - 数据范围: ${minVal.toFixed(4)} ~ ${maxVal.toFixed(4)}`);
|
|
||||||
console.log(` - 数据均值: ${(sumVal / validCount).toFixed(4)}`);
|
|
||||||
|
|
||||||
console.log('🧠 2. 执行模型推理...');
|
|
||||||
const startInference = Date.now();
|
|
||||||
const outputs = await this.recSession.run({ [this.recSession.inputNames[0]]: inputTensor });
|
const outputs = await this.recSession.run({ [this.recSession.inputNames[0]]: inputTensor });
|
||||||
const inferenceTime = Date.now() - startInference;
|
|
||||||
console.log(`✅ 模型推理完成 (${inferenceTime}ms)`);
|
|
||||||
|
|
||||||
const outputNames = this.recSession.outputNames;
|
|
||||||
console.log(` - 输出数量: ${outputNames.length}`);
|
|
||||||
|
|
||||||
outputNames.forEach((name, index) => {
|
|
||||||
const output = outputs[name];
|
|
||||||
if (output) {
|
|
||||||
console.log(` - 输出 ${index + 1} (${name}): 形状 [${output.dims.join(', ')}]`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log('🔍 3. 后处理识别结果...');
|
|
||||||
const result = this.postprocessRecognition(outputs);
|
const result = this.postprocessRecognition(outputs);
|
||||||
console.log('✅ 后处理完成');
|
|
||||||
console.log(` - 识别文本: "${result.text}"`);
|
|
||||||
console.log(` - 置信度: ${result.confidence.toFixed(4)}`);
|
|
||||||
console.log(` - 文本长度: ${result.text.length} 字符`);
|
|
||||||
|
|
||||||
console.log('🎉 === 文本识别流程完成 ===');
|
const processingTime = Date.now() - startTime;
|
||||||
|
this.logger.info(`识别完成 - 区域 ${regionIndex}: "${result.text}", 置信度: ${result.confidence.toFixed(4)}, 耗时: ${processingTime}ms`);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('❌ 文本识别失败:');
|
this.logger.error(`文本识别失败 - 区域 ${regionIndex}`, error);
|
||||||
console.error(` - 错误信息: ${error.message}`);
|
|
||||||
return { text: '', confidence: 0 };
|
return { text: '', confidence: 0 };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async prepareRecognitionInput(textRegionBuffer) {
|
async prepareRecognitionInput(textRegionBuffer, regionIndex = 0) {
|
||||||
console.log(' 📝 准备识别输入详情:');
|
this.logger.debug(`准备识别输入 - 区域 ${regionIndex}`);
|
||||||
|
|
||||||
|
const targetHeight = 48;
|
||||||
|
const targetWidth = 320; // 原始目标宽度
|
||||||
|
const finalWidth = targetWidth + 20; // 最终宽度(左右各加10像素)
|
||||||
|
const timestamp = Date.now();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const targetHeight = 48;
|
|
||||||
const targetWidth = 320;
|
|
||||||
|
|
||||||
const metadata = await sharp(textRegionBuffer).metadata();
|
const metadata = await sharp(textRegionBuffer).metadata();
|
||||||
console.log(` - 原始图像尺寸: ${metadata.width}x${metadata.height}`);
|
this.logger.debug(`原始区域 ${regionIndex}: ${metadata.width}x${metadata.height}`);
|
||||||
|
|
||||||
// 保存原始图像用于调试
|
// 保存原始裁剪区域图像
|
||||||
const originalPath = path.join(this.debugDir, `original-${Date.now()}.png`);
|
const originalPath = path.join(this.preprocessedDir, `region-${regionIndex}-original-${timestamp}.png`);
|
||||||
await fse.writeFile(originalPath, textRegionBuffer);
|
await fse.writeFile(originalPath, textRegionBuffer);
|
||||||
|
this.logger.debug(`保存原始区域图像: ${originalPath}`);
|
||||||
|
|
||||||
// 关键修复:正确的预处理流程
|
// 图像分析
|
||||||
let processedBuffer = textRegionBuffer;
|
const stats = await sharp(textRegionBuffer).grayscale().stats();
|
||||||
|
|
||||||
// 1. 分析图像特性
|
|
||||||
const stats = await sharp(processedBuffer)
|
|
||||||
.grayscale()
|
|
||||||
.stats();
|
|
||||||
const meanBrightness = stats.channels[0].mean;
|
const meanBrightness = stats.channels[0].mean;
|
||||||
const stdDev = stats.channels[0].stdev;
|
const stdDev = stats.channels[0].stdev;
|
||||||
|
|
||||||
console.log(` - 图像统计: 均值=${meanBrightness.toFixed(1)}, 标准差=${stdDev.toFixed(1)}`);
|
this.logger.debug(`图像统计 - 区域 ${regionIndex}: 亮度=${meanBrightness.toFixed(1)}, 对比度=${stdDev.toFixed(1)}`);
|
||||||
|
|
||||||
|
// 智能预处理
|
||||||
|
let processedBuffer = await this.applySmartPreprocessing(textRegionBuffer, meanBrightness, stdDev, regionIndex);
|
||||||
|
|
||||||
|
// 保存预处理后的图像(灰度+对比度调整后)
|
||||||
|
const processedPath = path.join(this.preprocessedDir, `region-${regionIndex}-processed-${timestamp}.png`);
|
||||||
|
await fse.writeFile(processedPath, processedBuffer);
|
||||||
|
this.logger.debug(`保存预处理图像: ${processedPath}`);
|
||||||
|
|
||||||
|
// 保持宽高比的resize,并在左右添加10像素空白
|
||||||
|
const resizedBuffer = await this.resizeWithAspectRatio(processedBuffer, targetWidth, targetHeight, regionIndex);
|
||||||
|
|
||||||
|
// 保存调整大小后的图像
|
||||||
|
const resizedPath = path.join(this.preprocessedDir, `region-${regionIndex}-resized-${timestamp}.png`);
|
||||||
|
await fse.writeFile(resizedPath, resizedBuffer);
|
||||||
|
this.logger.debug(`保存调整大小图像: ${resizedPath}`);
|
||||||
|
|
||||||
|
// 使用最终尺寸创建张量
|
||||||
|
const inputData = await this.bufferToTensor(resizedBuffer, finalWidth, targetHeight);
|
||||||
|
this.logger.debug(`识别输入张量准备完成 - 区域 ${regionIndex}`);
|
||||||
|
|
||||||
|
// 创建张量时使用最终尺寸
|
||||||
|
return new Tensor('float32', inputData, [1, 3, targetHeight, finalWidth]);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(`准备识别输入失败 - 区域 ${regionIndex}`, error);
|
||||||
|
return new Tensor('float32', new Float32Array(3 * targetHeight * finalWidth).fill(0.5), [1, 3, targetHeight, finalWidth]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async applySmartPreprocessing(buffer, meanBrightness, stdDev, regionIndex = 0) {
|
||||||
|
let processedBuffer = buffer;
|
||||||
|
|
||||||
// 2. 改进的预处理策略
|
|
||||||
if (meanBrightness > 200 && stdDev < 30) {
|
if (meanBrightness > 200 && stdDev < 30) {
|
||||||
console.log(' - 检测到高亮度图像,进行对比度增强');
|
this.logger.debug(`区域 ${regionIndex}: 应用高亮度图像增强`);
|
||||||
processedBuffer = await sharp(processedBuffer)
|
processedBuffer = await sharp(buffer)
|
||||||
.linear(1.5, -50)
|
.linear(1.5, -50)
|
||||||
.normalize()
|
.normalize()
|
||||||
.grayscale()
|
.grayscale()
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
} else if (meanBrightness < 80) {
|
} else if (meanBrightness < 80) {
|
||||||
console.log(' - 检测到低亮度图像,进行亮度调整');
|
this.logger.debug(`区域 ${regionIndex}: 应用低亮度图像增强`);
|
||||||
processedBuffer = await sharp(processedBuffer)
|
processedBuffer = await sharp(buffer)
|
||||||
.linear(1.2, 30)
|
.linear(1.2, 30)
|
||||||
.normalize()
|
.normalize()
|
||||||
.grayscale()
|
.grayscale()
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
|
} else if (stdDev < 20) {
|
||||||
|
this.logger.debug(`区域 ${regionIndex}: 应用低对比度增强`);
|
||||||
|
processedBuffer = await sharp(buffer)
|
||||||
|
.linear(1.3, -20)
|
||||||
|
.normalize()
|
||||||
|
.grayscale()
|
||||||
|
.toBuffer();
|
||||||
} else {
|
} else {
|
||||||
console.log(' - 使用标准化灰度处理');
|
this.logger.debug(`区域 ${regionIndex}: 应用标准化灰度处理`);
|
||||||
processedBuffer = await sharp(processedBuffer)
|
processedBuffer = await sharp(buffer)
|
||||||
.normalize()
|
.normalize()
|
||||||
.grayscale()
|
.grayscale()
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. 保持宽高比的resize
|
return processedBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
async resizeWithAspectRatio(buffer, targetWidth, targetHeight, regionIndex = 0) {
|
||||||
|
const metadata = await sharp(buffer).metadata();
|
||||||
const originalAspectRatio = metadata.width / metadata.height;
|
const originalAspectRatio = metadata.width / metadata.height;
|
||||||
const targetAspectRatio = targetWidth / targetHeight;
|
const targetAspectRatio = targetWidth / targetHeight;
|
||||||
|
|
||||||
let resizeWidth, resizeHeight;
|
let resizeWidth, resizeHeight;
|
||||||
|
|
||||||
if (originalAspectRatio > targetAspectRatio) {
|
if (originalAspectRatio > targetAspectRatio) {
|
||||||
// 宽度限制
|
// 宽度限制,按宽度缩放
|
||||||
resizeWidth = targetWidth;
|
resizeWidth = targetWidth;
|
||||||
resizeHeight = Math.round(targetWidth / originalAspectRatio);
|
resizeHeight = Math.round(targetWidth / originalAspectRatio);
|
||||||
} else {
|
} else {
|
||||||
// 高度限制
|
// 高度限制,按高度缩放
|
||||||
resizeHeight = targetHeight;
|
resizeHeight = targetHeight;
|
||||||
resizeWidth = Math.round(targetHeight * originalAspectRatio);
|
resizeWidth = Math.round(targetHeight * originalAspectRatio);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 确保尺寸有效
|
|
||||||
resizeWidth = Math.max(1, Math.min(resizeWidth, targetWidth));
|
resizeWidth = Math.max(1, Math.min(resizeWidth, targetWidth));
|
||||||
resizeHeight = Math.max(1, Math.min(resizeHeight, targetHeight));
|
resizeHeight = Math.max(1, Math.min(resizeHeight, targetHeight));
|
||||||
|
|
||||||
processedBuffer = await sharp(processedBuffer)
|
this.logger.debug(`区域 ${regionIndex}: 调整尺寸 ${metadata.width}x${metadata.height} -> ${resizeWidth}x${resizeHeight}`);
|
||||||
|
|
||||||
|
// 计算居中的偏移量
|
||||||
|
const offsetX = Math.floor((targetWidth - resizeWidth) / 2);
|
||||||
|
const offsetY = Math.floor((targetHeight - resizeHeight) / 2);
|
||||||
|
|
||||||
|
this.logger.debug(`区域 ${regionIndex}: 居中偏移 X=${offsetX}, Y=${offsetY}`);
|
||||||
|
|
||||||
|
// 先调整大小并居中
|
||||||
|
let resizedBuffer = await sharp(buffer)
|
||||||
.resize(resizeWidth, resizeHeight, {
|
.resize(resizeWidth, resizeHeight, {
|
||||||
fit: 'contain',
|
fit: 'contain',
|
||||||
background: { r: 255, g: 255, b: 255 }
|
background: { r: 255, g: 255, b: 255 }
|
||||||
})
|
})
|
||||||
.extend({
|
.extend({
|
||||||
top: 0,
|
top: offsetY,
|
||||||
bottom: targetHeight - resizeHeight,
|
bottom: targetHeight - resizeHeight - offsetY,
|
||||||
left: 0,
|
left: offsetX,
|
||||||
right: targetWidth - resizeWidth,
|
right: targetWidth - resizeWidth - offsetX,
|
||||||
background: { r: 255, g: 255, b: 255 }
|
background: { r: 255, g: 255, b: 255 }
|
||||||
})
|
})
|
||||||
.png()
|
.png()
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
|
|
||||||
const processedMetadata = await sharp(processedBuffer).metadata();
|
// 在左右各添加10像素空白
|
||||||
console.log(` - 处理后尺寸: ${processedMetadata.width}x${processedMetadata.height}`);
|
const finalWidth = targetWidth + 20; // 左右各加10像素
|
||||||
|
const finalHeight = targetHeight;
|
||||||
|
|
||||||
// 保存预处理后的图像用于调试
|
resizedBuffer = await sharp(resizedBuffer)
|
||||||
const processedPath = path.join(this.debugDir, `processed-${Date.now()}.png`);
|
.extend({
|
||||||
await fse.writeFile(processedPath, processedBuffer);
|
top: 0,
|
||||||
|
bottom: 0,
|
||||||
|
left: 10,
|
||||||
|
right: 10,
|
||||||
|
background: { r: 255, g: 255, b: 255 }
|
||||||
|
})
|
||||||
|
.png()
|
||||||
|
.toBuffer();
|
||||||
|
|
||||||
// 4. 转换为张量 - 关键修复:正确的归一化
|
this.logger.debug(`区域 ${regionIndex}: 最终尺寸 ${finalWidth}x${finalHeight} (左右各加10像素空白)`);
|
||||||
console.log(' - 转换为张量数据...');
|
|
||||||
const imageData = await sharp(processedBuffer)
|
return resizedBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
async bufferToTensor(buffer, width, height) {
|
||||||
|
// 获取实际图像尺寸(因为现在宽度增加了20像素)
|
||||||
|
const metadata = await sharp(buffer).metadata();
|
||||||
|
const actualWidth = metadata.width;
|
||||||
|
const actualHeight = metadata.height;
|
||||||
|
|
||||||
|
const imageData = await sharp(buffer)
|
||||||
.ensureAlpha()
|
.ensureAlpha()
|
||||||
.raw()
|
.raw()
|
||||||
.toBuffer({ resolveWithObject: true });
|
.toBuffer({ resolveWithObject: true });
|
||||||
|
|
||||||
const inputData = new Float32Array(3 * targetHeight * targetWidth);
|
// 使用实际尺寸创建张量
|
||||||
|
const inputData = new Float32Array(3 * actualHeight * actualWidth);
|
||||||
const data = imageData.data;
|
const data = imageData.data;
|
||||||
const channels = imageData.info.channels;
|
|
||||||
|
|
||||||
// 使用正确的归一化方法
|
for (let i = 0; i < data.length; i += 4) {
|
||||||
for (let i = 0; i < data.length; i += channels) {
|
const pixelIndex = Math.floor(i / 4);
|
||||||
const pixelIndex = Math.floor(i / channels);
|
const y = Math.floor(pixelIndex / actualWidth);
|
||||||
const y = Math.floor(pixelIndex / targetWidth);
|
const x = pixelIndex % actualWidth;
|
||||||
const x = pixelIndex % targetWidth;
|
|
||||||
|
|
||||||
// 对每个位置,三个通道使用相同的灰度值
|
// 使用灰度值填充三个通道
|
||||||
const grayValue = data[i] / 255.0;
|
const grayValue = data[i] / 255.0;
|
||||||
|
|
||||||
for (let c = 0; c < 3; c++) {
|
for (let c = 0; c < 3; c++) {
|
||||||
const inputIndex = c * targetHeight * targetWidth + y * targetWidth + x;
|
const inputIndex = c * actualHeight * actualWidth + y * actualWidth + x;
|
||||||
if (inputIndex < inputData.length) {
|
if (inputIndex < inputData.length) {
|
||||||
inputData[inputIndex] = grayValue;
|
inputData[inputIndex] = grayValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(` - 输入数据长度: ${inputData.length}`);
|
return inputData;
|
||||||
|
|
||||||
// 数据验证
|
|
||||||
let validCount = 0;
|
|
||||||
let sumValue = 0;
|
|
||||||
let minValue = Infinity;
|
|
||||||
let maxValue = -Infinity;
|
|
||||||
|
|
||||||
for (let i = 0; i < Math.min(100, inputData.length); i++) {
|
|
||||||
const val = inputData[i];
|
|
||||||
if (!isNaN(val) && isFinite(val)) {
|
|
||||||
validCount++;
|
|
||||||
sumValue += val;
|
|
||||||
minValue = Math.min(minValue, val);
|
|
||||||
maxValue = Math.max(maxValue, val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(` - 数据验证: 有效=${validCount}`);
|
|
||||||
console.log(` - 数据范围: ${minValue.toFixed(4)} ~ ${maxValue.toFixed(4)}`);
|
|
||||||
console.log(` - 数据均值: ${(sumValue / validCount).toFixed(4)}`);
|
|
||||||
|
|
||||||
return new Tensor('float32', inputData, [1, 3, targetHeight, targetWidth]);
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
console.error(` ❌ 准备输入失败: ${error.message}`);
|
|
||||||
// 返回有效的默认张量
|
|
||||||
return new Tensor('float32', new Float32Array(3 * 48 * 320).fill(0.5), [1, 3, 48, 320]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
postprocessRecognition(outputs) {
|
postprocessRecognition(outputs) {
|
||||||
console.log(' 📝 后处理识别结果详情:');
|
this.logger.debug('开始识别后处理');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const outputNames = this.recSession.outputNames;
|
const outputNames = this.recSession.outputNames;
|
||||||
const recognitionOutput = outputs[outputNames[0]];
|
const recognitionOutput = outputs[outputNames[0]];
|
||||||
|
|
||||||
if (!recognitionOutput) {
|
if (!recognitionOutput) {
|
||||||
console.log(' ❌ 识别输出为空');
|
this.logger.debug('识别输出为空');
|
||||||
return { text: '', confidence: 0 };
|
return { text: '', confidence: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = recognitionOutput.data;
|
const data = recognitionOutput.data;
|
||||||
const [batch, seqLen, vocabSize] = recognitionOutput.dims;
|
const [batch, seqLen, vocabSize] = recognitionOutput.dims;
|
||||||
|
|
||||||
console.log(` - 序列长度: ${seqLen}, 词汇表大小: ${vocabSize}`);
|
this.logger.debug(`序列长度: ${seqLen}, 词汇表大小: ${vocabSize}, 字符集大小: ${this.characterSet.length}`);
|
||||||
console.log(` - 输出数据总数: ${data.length}`);
|
|
||||||
console.log(` - 字符集大小: ${this.characterSet.length}`);
|
|
||||||
|
|
||||||
if (this.characterSet.length === 0) {
|
if (this.characterSet.length === 0) {
|
||||||
console.log(' ❌ 字符集为空');
|
this.logger.error('字符集为空');
|
||||||
return { text: '', confidence: 0 };
|
return { text: '', confidence: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
// 改进的CTC解码算法
|
// 验证词汇表大小与字符集大小的匹配
|
||||||
|
if (vocabSize !== this.characterSet.length + 1) {
|
||||||
|
this.logger.warn(`词汇表大小(${vocabSize})与字符集大小(${this.characterSet.length})不匹配,可能影响识别效果`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { text, confidence } = this.ctcDecode(data, seqLen, vocabSize);
|
||||||
|
this.logger.debug(`解码结果: "${text}", 置信度: ${confidence.toFixed(4)}`);
|
||||||
|
|
||||||
|
return { text, confidence };
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('识别后处理失败', error);
|
||||||
|
return { text: '', confidence: 0 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctcDecode(data, seqLen, vocabSize) {
|
||||||
let text = '';
|
let text = '';
|
||||||
let lastCharIndex = -1;
|
let lastCharIndex = -1;
|
||||||
let confidenceSum = 0;
|
let confidenceSum = 0;
|
||||||
let charCount = 0;
|
let charCount = 0;
|
||||||
|
|
||||||
// 降低置信度阈值,提高召回率
|
// 动态阈值调整
|
||||||
const confidenceThreshold = 0.05;
|
const baseThreshold = 0.03;
|
||||||
|
let confidenceThreshold = baseThreshold;
|
||||||
|
|
||||||
|
// 先分析整个序列的置信度分布
|
||||||
|
let maxSequenceProb = 0;
|
||||||
|
for (let t = 0; t < seqLen; t++) {
|
||||||
|
for (let i = 0; i < vocabSize; i++) {
|
||||||
|
maxSequenceProb = Math.max(maxSequenceProb, data[t * vocabSize + i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果整体置信度较低,降低阈值
|
||||||
|
if (maxSequenceProb < 0.5) {
|
||||||
|
confidenceThreshold = baseThreshold * 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.debug(`使用解码阈值: ${confidenceThreshold.toFixed(4)}`);
|
||||||
|
|
||||||
console.log(' - 处理每个时间步:');
|
|
||||||
for (let t = 0; t < seqLen; t++) {
|
for (let t = 0; t < seqLen; t++) {
|
||||||
let maxProb = -1;
|
let maxProb = -1;
|
||||||
let maxIndex = -1;
|
let maxIndex = -1;
|
||||||
@ -330,40 +374,59 @@ class TextRecognizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 改进的解码逻辑
|
// 改进的CTC解码逻辑
|
||||||
if (maxIndex > 0 && maxProb > confidenceThreshold) {
|
if (maxIndex > 0 && maxProb > confidenceThreshold) {
|
||||||
const char = this.characterSet[maxIndex - 1] || '';
|
const charIndex = maxIndex - 1;
|
||||||
|
if (charIndex < this.characterSet.length) {
|
||||||
|
const char = this.characterSet[charIndex];
|
||||||
|
|
||||||
// 放宽重复字符限制
|
// 更智能的重复字符处理
|
||||||
if (maxIndex !== lastCharIndex || maxProb > 0.8) {
|
const shouldAddChar = maxIndex !== lastCharIndex ||
|
||||||
if (char && char.trim() !== '') {
|
maxProb > 0.8 ||
|
||||||
|
(maxIndex === lastCharIndex && charCount > 0 && text[text.length - 1] !== char);
|
||||||
|
|
||||||
|
if (shouldAddChar && char && char.trim() !== '') {
|
||||||
text += char;
|
text += char;
|
||||||
confidenceSum += maxProb;
|
confidenceSum += maxProb;
|
||||||
charCount++;
|
charCount++;
|
||||||
console.log(` [位置 ${t}] 字符: "${char}", 置信度: ${maxProb.toFixed(4)}`);
|
|
||||||
}
|
}
|
||||||
lastCharIndex = maxIndex;
|
lastCharIndex = maxIndex;
|
||||||
|
} else {
|
||||||
|
this.logger.warn(`字符索引${charIndex}超出字符集范围(0-${this.characterSet.length-1})`);
|
||||||
}
|
}
|
||||||
} else if (maxIndex === 0) {
|
} else if (maxIndex === 0) {
|
||||||
// 空白符,重置lastCharIndex
|
|
||||||
lastCharIndex = -1;
|
lastCharIndex = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const avgConfidence = charCount > 0 ? confidenceSum / charCount : 0;
|
const avgConfidence = charCount > 0 ? confidenceSum / charCount : 0;
|
||||||
|
|
||||||
console.log(` - 识别结果: "${text}"`);
|
// 基本的文本清理(不包含错误模式修复)
|
||||||
console.log(` - 字符数: ${charCount}, 平均置信度: ${avgConfidence.toFixed(4)}`);
|
const cleanedText = this.basicTextCleaning(text);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
text: text,
|
text: cleanedText,
|
||||||
confidence: avgConfidence
|
confidence: avgConfidence
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
console.error(` ❌ 后处理失败: ${error.message}`);
|
|
||||||
return { text: '', confidence: 0 };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
basicTextCleaning(text) {
|
||||||
|
if (!text) return '';
|
||||||
|
|
||||||
|
let cleaned = text;
|
||||||
|
|
||||||
|
// 1. 移除过多的重复字符(保留合理的重复)
|
||||||
|
cleaned = cleaned.replace(/([^0-9])\1{2,}/g, '$1$1');
|
||||||
|
|
||||||
|
// 2. 修复标点符号
|
||||||
|
cleaned = cleaned.replace(/∶/g, ':')
|
||||||
|
.replace(/《/g, '(')
|
||||||
|
.replace(/》/g, ')');
|
||||||
|
|
||||||
|
// 3. 修复数字和百分号
|
||||||
|
cleaned = cleaned.replace(/(\d+)%%/g, '$1%');
|
||||||
|
|
||||||
|
return cleaned.trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +1,28 @@
|
|||||||
// server/utils/textRegionCropper.js
|
// server/utils/textRegionCropper.js
|
||||||
import sharp from 'sharp';
|
import sharp from 'sharp';
|
||||||
|
import fse from 'fs-extra';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
class TextRegionCropper {
|
class TextRegionCropper {
|
||||||
constructor() {
|
constructor() {
|
||||||
// 可以在这里添加配置参数
|
this.logger = {
|
||||||
|
info: (msg, ...args) => console.log(`✂️ [裁剪] ${msg}`, ...args),
|
||||||
|
debug: (msg, ...args) => console.log(`🐛 [裁剪] ${msg}`, ...args),
|
||||||
|
error: (msg, ...args) => console.error(`❌ [裁剪] ${msg}`, ...args)
|
||||||
|
};
|
||||||
|
// 确保裁剪调试目录存在
|
||||||
|
this.cropDebugDir = path.join(process.cwd(), 'temp', 'crop_debug');
|
||||||
|
fse.ensureDirSync(this.cropDebugDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
async cropTextRegion(imageBuffer, box, regionIndex) {
|
async cropTextRegion(imageBuffer, box, regionIndex) {
|
||||||
|
const timestamp = Date.now();
|
||||||
try {
|
try {
|
||||||
const metadata = await sharp(imageBuffer).metadata();
|
const metadata = await sharp(imageBuffer).metadata();
|
||||||
const imgWidth = metadata.width;
|
const imgWidth = metadata.width;
|
||||||
const imgHeight = metadata.height;
|
const imgHeight = metadata.height;
|
||||||
|
|
||||||
|
// 计算文本框的边界
|
||||||
const left = Math.min(box.x1, box.x2, box.x3, box.x4);
|
const left = Math.min(box.x1, box.x2, box.x3, box.x4);
|
||||||
const top = Math.min(box.y1, box.y2, box.y3, box.y4);
|
const top = Math.min(box.y1, box.y2, box.y3, box.y4);
|
||||||
const right = Math.max(box.x1, box.x2, box.x3, box.x4);
|
const right = Math.max(box.x1, box.x2, box.x3, box.x4);
|
||||||
@ -20,78 +31,54 @@ class TextRegionCropper {
|
|||||||
const originalWidth = right - left;
|
const originalWidth = right - left;
|
||||||
const originalHeight = bottom - top;
|
const originalHeight = bottom - top;
|
||||||
|
|
||||||
// 减少扩展,避免引入过多背景
|
// 四边各扩大5像素
|
||||||
const widthExpand = 10;
|
const expandPixels = 5;
|
||||||
const heightExpand = 10;
|
|
||||||
|
|
||||||
const newWidth = originalWidth + widthExpand;
|
const expandedLeft = Math.max(0, left - expandPixels);
|
||||||
const newHeight = originalHeight + heightExpand;
|
const expandedTop = Math.max(0, top - expandPixels);
|
||||||
|
const expandedRight = Math.min(imgWidth - 1, right + expandPixels);
|
||||||
|
const expandedBottom = Math.min(imgHeight - 1, bottom + expandPixels);
|
||||||
|
|
||||||
const centerX = (left + right) / 2;
|
const expandedWidth = expandedRight - expandedLeft;
|
||||||
const centerY = (top + bottom) / 2;
|
const expandedHeight = expandedBottom - expandedTop;
|
||||||
|
|
||||||
const expandedLeft = Math.max(0, centerX - newWidth / 2);
|
if (expandedWidth <= 0 || expandedHeight <= 0) {
|
||||||
const expandedTop = Math.max(0, centerY - newHeight / 2);
|
this.logger.debug(`区域 ${regionIndex}: 无效的裁剪区域`);
|
||||||
const expandedRight = Math.min(imgWidth - 1, centerX + newWidth / 2);
|
|
||||||
const expandedBottom = Math.min(imgHeight - 1, centerY + newHeight / 2);
|
|
||||||
|
|
||||||
const finalWidth = expandedRight - expandedLeft;
|
|
||||||
const finalHeight = expandedBottom - expandedTop;
|
|
||||||
|
|
||||||
if (finalWidth <= 0 || finalHeight <= 0) {
|
|
||||||
console.log(`❌ 区域 ${regionIndex}: 无效的裁剪区域`);
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
let adjustedLeft = expandedLeft;
|
|
||||||
let adjustedTop = expandedTop;
|
|
||||||
let adjustedWidth = finalWidth;
|
|
||||||
let adjustedHeight = finalHeight;
|
|
||||||
|
|
||||||
if (expandedLeft < 0) {
|
|
||||||
adjustedLeft = 0;
|
|
||||||
adjustedWidth = expandedRight;
|
|
||||||
}
|
|
||||||
if (expandedTop < 0) {
|
|
||||||
adjustedTop = 0;
|
|
||||||
adjustedHeight = expandedBottom;
|
|
||||||
}
|
|
||||||
if (expandedRight > imgWidth) {
|
|
||||||
adjustedWidth = imgWidth - adjustedLeft;
|
|
||||||
}
|
|
||||||
if (expandedBottom > imgHeight) {
|
|
||||||
adjustedHeight = imgHeight - adjustedTop;
|
|
||||||
}
|
|
||||||
|
|
||||||
const croppedBuffer = await sharp(imageBuffer)
|
const croppedBuffer = await sharp(imageBuffer)
|
||||||
.extract({
|
.extract({
|
||||||
left: Math.floor(adjustedLeft),
|
left: Math.floor(expandedLeft),
|
||||||
top: Math.floor(adjustedTop),
|
top: Math.floor(expandedTop),
|
||||||
width: Math.floor(adjustedWidth),
|
width: Math.floor(expandedWidth),
|
||||||
height: Math.floor(adjustedHeight)
|
height: Math.floor(expandedHeight)
|
||||||
})
|
})
|
||||||
.png()
|
.png()
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
|
|
||||||
console.log(`✂️ 区域 ${regionIndex}: 裁剪 ${Math.floor(adjustedWidth)}x${Math.floor(adjustedHeight)}`);
|
// 保存裁剪后的图像用于调试
|
||||||
|
const cropPath = path.join(this.cropDebugDir, `crop-${regionIndex}-${timestamp}.png`);
|
||||||
|
await fse.writeFile(cropPath, croppedBuffer);
|
||||||
|
this.logger.debug(`区域 ${regionIndex}: 裁剪 ${Math.floor(expandedWidth)}x${Math.floor(expandedHeight)} -> ${cropPath}`);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: croppedBuffer,
|
buffer: croppedBuffer,
|
||||||
boxInfo: {
|
boxInfo: {
|
||||||
original: { left, top, right, bottom, width: originalWidth, height: originalHeight },
|
original: { left, top, right, bottom, width: originalWidth, height: originalHeight },
|
||||||
expanded: {
|
expanded: {
|
||||||
left: adjustedLeft,
|
left: expandedLeft,
|
||||||
top: adjustedTop,
|
top: expandedTop,
|
||||||
right: adjustedLeft + adjustedWidth,
|
right: expandedRight,
|
||||||
bottom: adjustedTop + adjustedHeight,
|
bottom: expandedBottom,
|
||||||
width: adjustedWidth,
|
width: expandedWidth,
|
||||||
height: adjustedHeight
|
height: expandedHeight
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`❌ 区域 ${regionIndex}: 裁剪失败`, error);
|
this.logger.error(`区域 ${regionIndex}: 裁剪失败`, error);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
正在加载...
在新工单中引用
屏蔽一个用户