487 行
14 KiB
TypeScript
487 行
14 KiB
TypeScript
import express from 'express';
|
|
import cors from 'cors';
|
|
import multer from 'multer';
|
|
import path from 'path';
|
|
import fs from 'fs-extra';
|
|
import { calculateFileMD5 } from './utils.js';
|
|
import { initDatabase, FileService } from '../database/database.js';
|
|
import onnxOcrManager from "./utils/onnxOcrManager.js";
|
|
|
|
import sharp from "sharp";
|
|
import fse from "fs-extra";
|
|
|
|
|
|
const app = express();
|
|
const PORT = 3000;
|
|
|
|
// 初始化数据库
|
|
initDatabase();
|
|
const fileService = new FileService();
|
|
|
|
// 确保上传目录和临时目录存在
|
|
const uploadDir = path.join(process.cwd(), 'uploads');
|
|
const tempDir = path.join(process.cwd(), 'temp');
|
|
const processedDir = path.join(process.cwd(), 'processed');
|
|
fs.ensureDirSync(uploadDir);
|
|
fs.ensureDirSync(tempDir);
|
|
fs.ensureDirSync(processedDir);
|
|
|
|
// 配置 multer - 修复中文文件名问题
|
|
const storage = multer.diskStorage({
|
|
destination: (req, file, cb) => {
|
|
cb(null, uploadDir);
|
|
},
|
|
filename: (req, file, cb) => {
|
|
// 处理中文文件名 - 使用原始文件名但确保安全
|
|
const originalName = Buffer.from(file.originalname, 'latin1').toString('utf8');
|
|
const ext = path.extname(originalName);
|
|
const name = path.basename(originalName, ext);
|
|
|
|
// 清理文件名,移除特殊字符
|
|
const safeName = name.replace(/[^a-zA-Z0-9\u4e00-\u9fa5]/g, '_');
|
|
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
|
|
const filename = safeName + '-' + uniqueSuffix + ext;
|
|
|
|
cb(null, filename);
|
|
}
|
|
});
|
|
|
|
const upload = multer({
|
|
storage,
|
|
fileFilter: (req, file, cb) => {
|
|
// 处理文件名编码
|
|
file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8');
|
|
cb(null, true);
|
|
}
|
|
});
|
|
|
|
// 设置响应头,确保使用 UTF-8 编码
|
|
app.use((req, res, next) => {
|
|
res.setHeader('Content-Type', 'application/json; charset=utf-8');
|
|
next();
|
|
});
|
|
|
|
app.use(cors());
|
|
app.use(express.json({ limit: '50mb' }));
|
|
app.use(express.urlencoded({ extended: true, limit: '50mb' }));
|
|
|
|
// 文件上传接口
|
|
app.post('/api/upload', upload.single('file'), async (req, res) => {
|
|
try {
|
|
if (!req.file) {
|
|
return res.status(400).json({ error: 'No file uploaded' });
|
|
}
|
|
|
|
// 确保文件名正确编码
|
|
const originalName = Buffer.from(req.file.originalname, 'latin1').toString('utf8');
|
|
|
|
const fileInfo = {
|
|
originalName: originalName,
|
|
fileName: req.file.filename,
|
|
filePath: req.file.path,
|
|
fileSize: req.file.size,
|
|
mimeType: req.file.mimetype
|
|
};
|
|
|
|
// 计算 MD5
|
|
const md5 = await calculateFileMD5(req.file.path);
|
|
|
|
// 保存到数据库
|
|
const fileRecord = await fileService.createFile({
|
|
...fileInfo,
|
|
md5
|
|
});
|
|
|
|
res.json({
|
|
success: true,
|
|
data: fileRecord
|
|
});
|
|
} catch (error) {
|
|
console.error('Upload error:', error);
|
|
res.status(500).json({ error: 'Upload failed: ' + error.message });
|
|
}
|
|
});
|
|
|
|
// 获取文件列表接口
|
|
app.get('/api/files', async (req, res) => {
|
|
try {
|
|
const page = parseInt(req.query.page as string) || 1;
|
|
const pageSize = parseInt(req.query.pageSize as string) || 100;
|
|
|
|
const result = await fileService.getFilesPaginated(page, pageSize);
|
|
|
|
// 返回统一的数据结构
|
|
res.json({
|
|
success: true,
|
|
data: result.files, // 直接返回文件数组
|
|
pagination: result.pagination
|
|
});
|
|
} catch (error) {
|
|
console.error('Get files error:', error);
|
|
res.status(500).json({
|
|
success: false,
|
|
error: 'Failed to get files: ' + error.message
|
|
});
|
|
}
|
|
});
|
|
|
|
// MD5 检查接口
|
|
app.post('/api/files/:id/check-md5', async (req, res) => {
|
|
try {
|
|
const fileId = parseInt(req.params.id);
|
|
const file = await fileService.getFileById(fileId);
|
|
|
|
if (!file) {
|
|
return res.status(404).json({ error: 'File not found' });
|
|
}
|
|
|
|
const currentMD5 = await calculateFileMD5(file.filePath);
|
|
const isChanged = currentMD5 !== file.md5;
|
|
|
|
res.json({
|
|
isChanged,
|
|
currentMD5,
|
|
originalMD5: file.md5,
|
|
file
|
|
});
|
|
} catch (error) {
|
|
console.error('MD5 check error:', error);
|
|
res.status(500).json({ error: 'MD5 check failed' });
|
|
}
|
|
});
|
|
|
|
// 更新 MD5 接口
|
|
app.put('/api/files/:id/update-md5', async (req, res) => {
|
|
try {
|
|
const fileId = parseInt(req.params.id);
|
|
const { md5 } = req.body;
|
|
|
|
await fileService.updateFileMD5(fileId, md5);
|
|
res.json({ success: true });
|
|
} catch (error) {
|
|
console.error('Update MD5 error:', error);
|
|
res.status(500).json({ error: 'Update failed' });
|
|
}
|
|
});
|
|
|
|
// OCR 识别接口 - 使用 OfflineOcrManager
|
|
app.post('/api/ocr/recognize', async (req, res) => {
|
|
try {
|
|
const { fileId, config } = req.body;
|
|
|
|
if (!fileId) {
|
|
return res.status(400).json({ error: 'File ID is required' });
|
|
}
|
|
|
|
const file = await fileService.getFileById(parseInt(fileId));
|
|
if (!file) {
|
|
return res.status(404).json({ error: 'File not found' });
|
|
}
|
|
|
|
console.log(`开始ONNX OCR识别: ${file.originalName}`);
|
|
|
|
// 使用ONNX OCR管理器进行识别
|
|
const result = await onnxOcrManager.recognizeImage(file.filePath, config);
|
|
|
|
res.json({
|
|
success: true,
|
|
data: {
|
|
textBlocks: result.textBlocks,
|
|
totalPages: result.totalPages,
|
|
processingTime: result.processingTime,
|
|
confidence: result.confidence,
|
|
processedImageUrl: '', // ONNX版本暂时不提供处理后的图片
|
|
imageInfo: result.imageInfo,
|
|
isOffline: result.isOffline
|
|
}
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error('ONNX OCR识别失败:', error);
|
|
res.status(500).json({ error: 'OCR识别失败: ' + error.message });
|
|
}
|
|
});
|
|
// 保存处理后的图片
|
|
async function saveProcessedImage(fileId: number, processedImagePath: string): Promise<string> {
|
|
try {
|
|
const targetPath = path.join(processedDir, `processed-${fileId}.png`);
|
|
|
|
// 使用sharp处理并保存图片
|
|
await sharp(processedImagePath)
|
|
.grayscale()
|
|
.normalize()
|
|
.sharpen()
|
|
.png()
|
|
.toFile(targetPath);
|
|
|
|
return `/api/files/${fileId}/processed-image`;
|
|
} catch (error) {
|
|
console.error('保存处理后的图片失败:', error);
|
|
return '';
|
|
}
|
|
}
|
|
|
|
// 获取处理后的图片
|
|
app.get('/api/files/:id/processed-image', async (req, res) => {
|
|
try {
|
|
const fileId = parseInt(req.params.id);
|
|
const processedImagePath = path.join(processedDir, `processed-${fileId}.png`);
|
|
|
|
if (!fs.existsSync(processedImagePath)) {
|
|
return res.status(404).json({ error: 'Processed image not found' });
|
|
}
|
|
|
|
res.setHeader('Content-Type', 'image/png');
|
|
res.sendFile(path.resolve(processedImagePath));
|
|
|
|
} catch (error) {
|
|
console.error('Get processed image error:', error);
|
|
res.status(500).json({ error: 'Failed to get processed image' });
|
|
}
|
|
});
|
|
|
|
// 保存 OCR 结果
|
|
app.post('/api/ocr/save-result', async (req, res) => {
|
|
try {
|
|
const { fileId, ocrData } = req.body;
|
|
|
|
if (!fileId || !ocrData) {
|
|
return res.status(400).json({ error: '文件ID和OCR数据是必需的' });
|
|
}
|
|
|
|
await fileService.saveOcrResult(parseInt(fileId), ocrData);
|
|
|
|
res.json({ success: true });
|
|
} catch (error) {
|
|
console.error('保存OCR结果失败:', error);
|
|
res.status(500).json({ error: '保存OCR结果失败: ' + error.message });
|
|
}
|
|
});
|
|
|
|
// 获取 OCR 结果
|
|
app.get('/api/ocr/result/:fileId', async (req, res) => {
|
|
try {
|
|
const fileId = parseInt(req.params.fileId);
|
|
const result = await fileService.getOcrResult(fileId);
|
|
|
|
if (result) {
|
|
res.json({
|
|
success: true,
|
|
data: result.ocr_data
|
|
});
|
|
} else {
|
|
res.json({
|
|
success: false,
|
|
error: '未找到OCR结果'
|
|
});
|
|
}
|
|
} catch (error) {
|
|
console.error('获取OCR结果失败:', error);
|
|
res.status(500).json({ error: '获取OCR结果失败: ' + error.message });
|
|
}
|
|
});
|
|
|
|
// 更新 OCR 文本(人工纠错)
|
|
app.put('/api/ocr/update-text', async (req, res) => {
|
|
try {
|
|
const { fileId, textBlocks } = req.body;
|
|
|
|
if (!fileId || !textBlocks) {
|
|
return res.status(400).json({ error: '文件ID和文本数据是必需的' });
|
|
}
|
|
|
|
await fileService.updateOcrText(parseInt(fileId), textBlocks);
|
|
|
|
res.json({ success: true });
|
|
} catch (error) {
|
|
console.error('更新OCR文本失败:', error);
|
|
res.status(500).json({ error: '更新OCR文本失败: ' + error.message });
|
|
}
|
|
});
|
|
|
|
// 获取文件预览接口
|
|
app.get('/api/files/:id/preview', async (req, res) => {
|
|
try {
|
|
const fileId = parseInt(req.params.id);
|
|
const file = await fileService.getFileById(fileId);
|
|
|
|
if (!file) {
|
|
return res.status(404).json({ error: 'File not found' });
|
|
}
|
|
|
|
// 检查文件是否存在
|
|
if (!fs.existsSync(file.filePath)) {
|
|
return res.status(404).json({ error: 'File not found on disk' });
|
|
}
|
|
|
|
// 设置正确的 Content-Type
|
|
res.setHeader('Content-Type', file.mimeType);
|
|
|
|
// 直接发送文件
|
|
res.sendFile(path.resolve(file.filePath));
|
|
|
|
} catch (error) {
|
|
console.error('File preview error:', error);
|
|
res.status(500).json({ error: 'Failed to get file preview' });
|
|
}
|
|
});
|
|
// 更新批量OCR接口
|
|
app.post('/api/ocr/batch-recognize', async (req, res) => {
|
|
try {
|
|
const { fileIds, config } = req.body;
|
|
|
|
if (!fileIds || !Array.isArray(fileIds)) {
|
|
return res.status(400).json({ error: 'File IDs array is required' });
|
|
}
|
|
|
|
const filePaths = [];
|
|
for (const fileId of fileIds) {
|
|
const file = await fileService.getFileById(parseInt(fileId));
|
|
if (file) {
|
|
filePaths.push(file.filePath);
|
|
}
|
|
}
|
|
|
|
const results = await onnxOcrManager.batchRecognize(filePaths, config);
|
|
|
|
res.json({
|
|
success: true,
|
|
data: results
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error('批量ONNX OCR识别失败:', error);
|
|
res.status(500).json({ error: '批量识别失败: ' + error.message });
|
|
}
|
|
});
|
|
// 获取预处理后的图片
|
|
app.get('/api/ocr/processed-image', async (req, res) => {
|
|
try {
|
|
const imagePath = req.query.path as string;
|
|
|
|
if (!imagePath) {
|
|
return res.status(400).json({ error: '图片路径是必需的' });
|
|
}
|
|
|
|
// 解码路径
|
|
const decodedPath = decodeURIComponent(imagePath);
|
|
|
|
if (!fse.existsSync(decodedPath)) {
|
|
return res.status(404).json({ error: '预处理图片不存在' });
|
|
}
|
|
|
|
res.setHeader('Content-Type', 'image/png');
|
|
res.sendFile(path.resolve(decodedPath));
|
|
|
|
} catch (error) {
|
|
console.error('获取预处理图片失败:', error);
|
|
res.status(500).json({ error: '获取预处理图片失败' });
|
|
}
|
|
});
|
|
|
|
// 在 server/server.ts 中添加调试接口
|
|
app.post('/api/ocr/debug-recognition', async (req, res) => {
|
|
try {
|
|
const { fileId, boxIndex } = req.body;
|
|
|
|
if (!fileId || boxIndex === undefined) {
|
|
return res.status(400).json({ error: '文件ID和框索引是必需的' });
|
|
}
|
|
|
|
const file = await fileService.getFileById(parseInt(fileId));
|
|
if (!file) {
|
|
return res.status(404).json({ error: '文件未找到' });
|
|
}
|
|
|
|
// 这里可以添加具体的调试逻辑
|
|
console.log(`🔧 调试文件 ${fileId} 的第 ${boxIndex} 个文本框`);
|
|
|
|
res.json({
|
|
success: true,
|
|
message: '调试信息已输出到控制台'
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error('调试失败:', error);
|
|
res.status(500).json({ error: '调试失败: ' + error.message });
|
|
}
|
|
});
|
|
// 更新OCR状态接口
|
|
app.get('/api/ocr/status', async (req, res) => {
|
|
try {
|
|
const status = onnxOcrManager.getStatus();
|
|
res.json({
|
|
success: true,
|
|
data: status
|
|
});
|
|
} catch (error) {
|
|
console.error('获取ONNX OCR状态失败:', error);
|
|
res.status(500).json({ error: '获取状态失败: ' + error.message });
|
|
}
|
|
});
|
|
// 获取文件缩略图接口
|
|
app.get('/api/files/:id/thumbnail', async (req, res) => {
|
|
const fileId = parseInt(req.params.id);
|
|
const file = await fileService.getFileById(fileId);
|
|
try {
|
|
|
|
if (!file) {
|
|
return res.status(404).json({ error: 'File not found' });
|
|
}
|
|
|
|
// 只对图片生成缩略图
|
|
if (!file.mimeType.startsWith('image/')) {
|
|
return res.status(400).json({ error: 'Not an image file' });
|
|
}
|
|
|
|
const thumbnailPath = path.join(tempDir, `thumbnail-${fileId}.jpg`);
|
|
|
|
// 生成缩略图
|
|
await sharp(file.filePath)
|
|
.resize(100, 100, {
|
|
fit: 'inside',
|
|
withoutEnlargement: true
|
|
})
|
|
.jpeg({ quality: 80 })
|
|
.toFile(thumbnailPath);
|
|
|
|
res.sendFile(path.resolve(thumbnailPath));
|
|
|
|
} catch (error) {
|
|
console.error('Thumbnail generation error:', error);
|
|
// 如果缩略图生成失败,返回原图
|
|
res.sendFile(path.resolve(file.filePath));
|
|
}
|
|
});
|
|
|
|
// 健康检查接口
|
|
app.get('/api/health', (req, res) => {
|
|
res.json({
|
|
status: 'OK',
|
|
timestamp: new Date().toISOString(),
|
|
service: 'file-management-api'
|
|
});
|
|
});
|
|
|
|
|
|
// 服务器启动时初始化OCR引擎
|
|
async function initializeOcrEngine() {
|
|
try {
|
|
console.log('正在初始化ONNX OCR引擎...');
|
|
await onnxOcrManager.initialize();
|
|
console.log('ONNX OCR引擎初始化完成');
|
|
} catch (error) {
|
|
console.error('ONNX OCR引擎初始化失败:', error);
|
|
}
|
|
}
|
|
|
|
function startServer() {
|
|
// 启动时初始化OCR引擎
|
|
initializeOcrEngine();
|
|
|
|
app.listen(PORT, () => {
|
|
console.log(`Server running on http://localhost:${PORT}`);
|
|
});
|
|
}
|
|
|
|
export { startServer }; |