import express from 'express'; import cors from 'cors'; import multer from 'multer'; import path from 'path'; import fs from 'fs-extra'; import { calculateFileMD5 } from './utils.js'; import { initDatabase, FileService } from '../database/database.js'; import onnxOcrManager from "./utils/onnxOcrManager.js"; import sharp from "sharp"; import fse from "fs-extra"; const app = express(); const PORT = 3000; // 初始化数据库 initDatabase(); const fileService = new FileService(); // 确保上传目录和临时目录存在 const uploadDir = path.join(process.cwd(), 'uploads'); const tempDir = path.join(process.cwd(), 'temp'); const processedDir = path.join(process.cwd(), 'processed'); fs.ensureDirSync(uploadDir); fs.ensureDirSync(tempDir); fs.ensureDirSync(processedDir); // 配置 multer - 修复中文文件名问题 const storage = multer.diskStorage({ destination: (req, file, cb) => { cb(null, uploadDir); }, filename: (req, file, cb) => { // 处理中文文件名 - 使用原始文件名但确保安全 const originalName = Buffer.from(file.originalname, 'latin1').toString('utf8'); const ext = path.extname(originalName); const name = path.basename(originalName, ext); // 清理文件名,移除特殊字符 const safeName = name.replace(/[^a-zA-Z0-9\u4e00-\u9fa5]/g, '_'); const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9); const filename = safeName + '-' + uniqueSuffix + ext; cb(null, filename); } }); const upload = multer({ storage, fileFilter: (req, file, cb) => { // 处理文件名编码 file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8'); cb(null, true); } }); // 设置响应头,确保使用 UTF-8 编码 app.use((req, res, next) => { res.setHeader('Content-Type', 'application/json; charset=utf-8'); next(); }); app.use(cors()); app.use(express.json({ limit: '50mb' })); app.use(express.urlencoded({ extended: true, limit: '50mb' })); // 文件上传接口 app.post('/api/upload', upload.single('file'), async (req, res) => { try { if (!req.file) { return res.status(400).json({ error: 'No file uploaded' }); } // 确保文件名正确编码 const originalName = Buffer.from(req.file.originalname, 'latin1').toString('utf8'); const fileInfo = { originalName: originalName, fileName: req.file.filename, filePath: req.file.path, fileSize: req.file.size, mimeType: req.file.mimetype }; // 计算 MD5 const md5 = await calculateFileMD5(req.file.path); // 保存到数据库 const fileRecord = await fileService.createFile({ ...fileInfo, md5 }); res.json({ success: true, data: fileRecord }); } catch (error) { console.error('Upload error:', error); res.status(500).json({ error: 'Upload failed: ' + error.message }); } }); // 获取文件列表接口 app.get('/api/files', async (req, res) => { try { const page = parseInt(req.query.page as string) || 1; const pageSize = parseInt(req.query.pageSize as string) || 100; const result = await fileService.getFilesPaginated(page, pageSize); // 返回统一的数据结构 res.json({ success: true, data: result.files, // 直接返回文件数组 pagination: result.pagination }); } catch (error) { console.error('Get files error:', error); res.status(500).json({ success: false, error: 'Failed to get files: ' + error.message }); } }); // MD5 检查接口 app.post('/api/files/:id/check-md5', async (req, res) => { try { const fileId = parseInt(req.params.id); const file = await fileService.getFileById(fileId); if (!file) { return res.status(404).json({ error: 'File not found' }); } const currentMD5 = await calculateFileMD5(file.filePath); const isChanged = currentMD5 !== file.md5; res.json({ isChanged, currentMD5, originalMD5: file.md5, file }); } catch (error) { console.error('MD5 check error:', error); res.status(500).json({ error: 'MD5 check failed' }); } }); // 更新 MD5 接口 app.put('/api/files/:id/update-md5', async (req, res) => { try { const fileId = parseInt(req.params.id); const { md5 } = req.body; await fileService.updateFileMD5(fileId, md5); res.json({ success: true }); } catch (error) { console.error('Update MD5 error:', error); res.status(500).json({ error: 'Update failed' }); } }); // OCR 识别接口 - 使用 OfflineOcrManager app.post('/api/ocr/recognize', async (req, res) => { try { const { fileId, config } = req.body; if (!fileId) { return res.status(400).json({ error: 'File ID is required' }); } const file = await fileService.getFileById(parseInt(fileId)); if (!file) { return res.status(404).json({ error: 'File not found' }); } console.log(`开始ONNX OCR识别: ${file.originalName}`); // 使用ONNX OCR管理器进行识别 const result = await onnxOcrManager.recognizeImage(file.filePath, config); res.json({ success: true, data: { textBlocks: result.textBlocks, totalPages: result.totalPages, processingTime: result.processingTime, confidence: result.confidence, processedImageUrl: '', // ONNX版本暂时不提供处理后的图片 imageInfo: result.imageInfo, isOffline: result.isOffline } }); } catch (error) { console.error('ONNX OCR识别失败:', error); res.status(500).json({ error: 'OCR识别失败: ' + error.message }); } }); // 保存处理后的图片 async function saveProcessedImage(fileId: number, processedImagePath: string): Promise { try { const targetPath = path.join(processedDir, `processed-${fileId}.png`); // 使用sharp处理并保存图片 await sharp(processedImagePath) .grayscale() .normalize() .sharpen() .png() .toFile(targetPath); return `/api/files/${fileId}/processed-image`; } catch (error) { console.error('保存处理后的图片失败:', error); return ''; } } // 获取处理后的图片 app.get('/api/files/:id/processed-image', async (req, res) => { try { const fileId = parseInt(req.params.id); const processedImagePath = path.join(processedDir, `processed-${fileId}.png`); if (!fs.existsSync(processedImagePath)) { return res.status(404).json({ error: 'Processed image not found' }); } res.setHeader('Content-Type', 'image/png'); res.sendFile(path.resolve(processedImagePath)); } catch (error) { console.error('Get processed image error:', error); res.status(500).json({ error: 'Failed to get processed image' }); } }); // 保存 OCR 结果 app.post('/api/ocr/save-result', async (req, res) => { try { const { fileId, ocrData } = req.body; if (!fileId || !ocrData) { return res.status(400).json({ error: '文件ID和OCR数据是必需的' }); } await fileService.saveOcrResult(parseInt(fileId), ocrData); res.json({ success: true }); } catch (error) { console.error('保存OCR结果失败:', error); res.status(500).json({ error: '保存OCR结果失败: ' + error.message }); } }); // 获取 OCR 结果 app.get('/api/ocr/result/:fileId', async (req, res) => { try { const fileId = parseInt(req.params.fileId); const result = await fileService.getOcrResult(fileId); if (result) { res.json({ success: true, data: result.ocr_data }); } else { res.json({ success: false, error: '未找到OCR结果' }); } } catch (error) { console.error('获取OCR结果失败:', error); res.status(500).json({ error: '获取OCR结果失败: ' + error.message }); } }); // 更新 OCR 文本(人工纠错) app.put('/api/ocr/update-text', async (req, res) => { try { const { fileId, textBlocks } = req.body; if (!fileId || !textBlocks) { return res.status(400).json({ error: '文件ID和文本数据是必需的' }); } await fileService.updateOcrText(parseInt(fileId), textBlocks); res.json({ success: true }); } catch (error) { console.error('更新OCR文本失败:', error); res.status(500).json({ error: '更新OCR文本失败: ' + error.message }); } }); // 获取文件预览接口 app.get('/api/files/:id/preview', async (req, res) => { try { const fileId = parseInt(req.params.id); const file = await fileService.getFileById(fileId); if (!file) { return res.status(404).json({ error: 'File not found' }); } // 检查文件是否存在 if (!fs.existsSync(file.filePath)) { return res.status(404).json({ error: 'File not found on disk' }); } // 设置正确的 Content-Type res.setHeader('Content-Type', file.mimeType); // 直接发送文件 res.sendFile(path.resolve(file.filePath)); } catch (error) { console.error('File preview error:', error); res.status(500).json({ error: 'Failed to get file preview' }); } }); // 获取预处理后的图片 app.get('/api/ocr/processed-image', async (req, res) => { try { const imagePath = req.query.path as string; if (!imagePath) { return res.status(400).json({ error: '图片路径是必需的' }); } // 解码路径 const decodedPath = decodeURIComponent(imagePath); if (!fse.existsSync(decodedPath)) { return res.status(404).json({ error: '预处理图片不存在' }); } res.setHeader('Content-Type', 'image/png'); res.sendFile(path.resolve(decodedPath)); } catch (error) { console.error('获取预处理图片失败:', error); res.status(500).json({ error: '获取预处理图片失败' }); } }); // 在 server/server.ts 中添加调试接口 app.post('/api/ocr/debug-recognition', async (req, res) => { try { const { fileId, boxIndex } = req.body; if (!fileId || boxIndex === undefined) { return res.status(400).json({ error: '文件ID和框索引是必需的' }); } const file = await fileService.getFileById(parseInt(fileId)); if (!file) { return res.status(404).json({ error: '文件未找到' }); } // 这里可以添加具体的调试逻辑 console.log(`🔧 调试文件 ${fileId} 的第 ${boxIndex} 个文本框`); res.json({ success: true, message: '调试信息已输出到控制台' }); } catch (error) { console.error('调试失败:', error); res.status(500).json({ error: '调试失败: ' + error.message }); } }); // 更新OCR状态接口 app.get('/api/ocr/status', async (req, res) => { try { const status = onnxOcrManager.getStatus(); res.json({ success: true, data: status }); } catch (error) { console.error('获取ONNX OCR状态失败:', error); res.status(500).json({ error: '获取状态失败: ' + error.message }); } }); // 获取文件缩略图接口 app.get('/api/files/:id/thumbnail', async (req, res) => { const fileId = parseInt(req.params.id); const file = await fileService.getFileById(fileId); try { if (!file) { return res.status(404).json({ error: 'File not found' }); } // 只对图片生成缩略图 if (!file.mimeType.startsWith('image/')) { return res.status(400).json({ error: 'Not an image file' }); } const thumbnailPath = path.join(tempDir, `thumbnail-${fileId}.jpg`); // 生成缩略图 await sharp(file.filePath) .resize(100, 100, { fit: 'inside', withoutEnlargement: true }) .jpeg({ quality: 80 }) .toFile(thumbnailPath); res.sendFile(path.resolve(thumbnailPath)); } catch (error) { console.error('Thumbnail generation error:', error); // 如果缩略图生成失败,返回原图 res.sendFile(path.resolve(file.filePath)); } }); // 健康检查接口 app.get('/api/health', (req, res) => { res.json({ status: 'OK', timestamp: new Date().toISOString(), service: 'file-management-api' }); }); // 服务器启动时初始化OCR引擎 async function initializeOcrEngine() { try { console.log('正在初始化ONNX OCR引擎...'); await onnxOcrManager.initialize(); console.log('ONNX OCR引擎初始化完成'); } catch (error) { console.error('ONNX OCR引擎初始化失败:', error); } } function startServer() { // 启动时初始化OCR引擎 initializeOcrEngine(); app.listen(PORT, () => { console.log(`Server running on http://localhost:${PORT}`); }); } export { startServer };