| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487 |
- import express from 'express';
- import cors from 'cors';
- import multer from 'multer';
- import path from 'path';
- import fs from 'fs-extra';
- import { calculateFileMD5 } from './utils.js';
- import { initDatabase, FileService } from '../database/database.js';
- import onnxOcrManager from "./utils/onnxOcrManager.js";
- import sharp from "sharp";
- import fse from "fs-extra";
- const app = express();
- const PORT = 3000;
- // 初始化数据库
- initDatabase();
- const fileService = new FileService();
- // 确保上传目录和临时目录存在
- const uploadDir = path.join(process.cwd(), 'uploads');
- const tempDir = path.join(process.cwd(), 'temp');
- const processedDir = path.join(process.cwd(), 'processed');
- fs.ensureDirSync(uploadDir);
- fs.ensureDirSync(tempDir);
- fs.ensureDirSync(processedDir);
- // 配置 multer - 修复中文文件名问题
- const storage = multer.diskStorage({
- destination: (req, file, cb) => {
- cb(null, uploadDir);
- },
- filename: (req, file, cb) => {
- // 处理中文文件名 - 使用原始文件名但确保安全
- const originalName = Buffer.from(file.originalname, 'latin1').toString('utf8');
- const ext = path.extname(originalName);
- const name = path.basename(originalName, ext);
- // 清理文件名,移除特殊字符
- const safeName = name.replace(/[^a-zA-Z0-9\u4e00-\u9fa5]/g, '_');
- const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
- const filename = safeName + '-' + uniqueSuffix + ext;
- cb(null, filename);
- }
- });
- const upload = multer({
- storage,
- fileFilter: (req, file, cb) => {
- // 处理文件名编码
- file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8');
- cb(null, true);
- }
- });
- // 设置响应头,确保使用 UTF-8 编码
- app.use((req, res, next) => {
- res.setHeader('Content-Type', 'application/json; charset=utf-8');
- next();
- });
- app.use(cors());
- app.use(express.json({ limit: '50mb' }));
- app.use(express.urlencoded({ extended: true, limit: '50mb' }));
- // 文件上传接口
- app.post('/api/upload', upload.single('file'), async (req, res) => {
- try {
- if (!req.file) {
- return res.status(400).json({ error: 'No file uploaded' });
- }
- // 确保文件名正确编码
- const originalName = Buffer.from(req.file.originalname, 'latin1').toString('utf8');
- const fileInfo = {
- originalName: originalName,
- fileName: req.file.filename,
- filePath: req.file.path,
- fileSize: req.file.size,
- mimeType: req.file.mimetype
- };
- // 计算 MD5
- const md5 = await calculateFileMD5(req.file.path);
- // 保存到数据库
- const fileRecord = await fileService.createFile({
- ...fileInfo,
- md5
- });
- res.json({
- success: true,
- data: fileRecord
- });
- } catch (error) {
- console.error('Upload error:', error);
- res.status(500).json({ error: 'Upload failed: ' + error.message });
- }
- });
- // 获取文件列表接口
- app.get('/api/files', async (req, res) => {
- try {
- const page = parseInt(req.query.page as string) || 1;
- const pageSize = parseInt(req.query.pageSize as string) || 100;
- const result = await fileService.getFilesPaginated(page, pageSize);
- // 返回统一的数据结构
- res.json({
- success: true,
- data: result.files, // 直接返回文件数组
- pagination: result.pagination
- });
- } catch (error) {
- console.error('Get files error:', error);
- res.status(500).json({
- success: false,
- error: 'Failed to get files: ' + error.message
- });
- }
- });
- // MD5 检查接口
- app.post('/api/files/:id/check-md5', async (req, res) => {
- try {
- const fileId = parseInt(req.params.id);
- const file = await fileService.getFileById(fileId);
- if (!file) {
- return res.status(404).json({ error: 'File not found' });
- }
- const currentMD5 = await calculateFileMD5(file.filePath);
- const isChanged = currentMD5 !== file.md5;
- res.json({
- isChanged,
- currentMD5,
- originalMD5: file.md5,
- file
- });
- } catch (error) {
- console.error('MD5 check error:', error);
- res.status(500).json({ error: 'MD5 check failed' });
- }
- });
- // 更新 MD5 接口
- app.put('/api/files/:id/update-md5', async (req, res) => {
- try {
- const fileId = parseInt(req.params.id);
- const { md5 } = req.body;
- await fileService.updateFileMD5(fileId, md5);
- res.json({ success: true });
- } catch (error) {
- console.error('Update MD5 error:', error);
- res.status(500).json({ error: 'Update failed' });
- }
- });
- // OCR 识别接口 - 使用 OfflineOcrManager
- app.post('/api/ocr/recognize', async (req, res) => {
- try {
- const { fileId, config } = req.body;
- if (!fileId) {
- return res.status(400).json({ error: 'File ID is required' });
- }
- const file = await fileService.getFileById(parseInt(fileId));
- if (!file) {
- return res.status(404).json({ error: 'File not found' });
- }
- console.log(`开始ONNX OCR识别: ${file.originalName}`);
- // 使用ONNX OCR管理器进行识别
- const result = await onnxOcrManager.recognizeImage(file.filePath, config);
- res.json({
- success: true,
- data: {
- textBlocks: result.textBlocks,
- totalPages: result.totalPages,
- processingTime: result.processingTime,
- confidence: result.confidence,
- processedImageUrl: '', // ONNX版本暂时不提供处理后的图片
- imageInfo: result.imageInfo,
- isOffline: result.isOffline
- }
- });
- } catch (error) {
- console.error('ONNX OCR识别失败:', error);
- res.status(500).json({ error: 'OCR识别失败: ' + error.message });
- }
- });
- // 保存处理后的图片
- async function saveProcessedImage(fileId: number, processedImagePath: string): Promise<string> {
- try {
- const targetPath = path.join(processedDir, `processed-${fileId}.png`);
- // 使用sharp处理并保存图片
- await sharp(processedImagePath)
- .grayscale()
- .normalize()
- .sharpen()
- .png()
- .toFile(targetPath);
- return `/api/files/${fileId}/processed-image`;
- } catch (error) {
- console.error('保存处理后的图片失败:', error);
- return '';
- }
- }
- // 获取处理后的图片
- app.get('/api/files/:id/processed-image', async (req, res) => {
- try {
- const fileId = parseInt(req.params.id);
- const processedImagePath = path.join(processedDir, `processed-${fileId}.png`);
- if (!fs.existsSync(processedImagePath)) {
- return res.status(404).json({ error: 'Processed image not found' });
- }
- res.setHeader('Content-Type', 'image/png');
- res.sendFile(path.resolve(processedImagePath));
- } catch (error) {
- console.error('Get processed image error:', error);
- res.status(500).json({ error: 'Failed to get processed image' });
- }
- });
- // 保存 OCR 结果
- app.post('/api/ocr/save-result', async (req, res) => {
- try {
- const { fileId, ocrData } = req.body;
- if (!fileId || !ocrData) {
- return res.status(400).json({ error: '文件ID和OCR数据是必需的' });
- }
- await fileService.saveOcrResult(parseInt(fileId), ocrData);
- res.json({ success: true });
- } catch (error) {
- console.error('保存OCR结果失败:', error);
- res.status(500).json({ error: '保存OCR结果失败: ' + error.message });
- }
- });
- // 获取 OCR 结果
- app.get('/api/ocr/result/:fileId', async (req, res) => {
- try {
- const fileId = parseInt(req.params.fileId);
- const result = await fileService.getOcrResult(fileId);
- if (result) {
- res.json({
- success: true,
- data: result.ocr_data
- });
- } else {
- res.json({
- success: false,
- error: '未找到OCR结果'
- });
- }
- } catch (error) {
- console.error('获取OCR结果失败:', error);
- res.status(500).json({ error: '获取OCR结果失败: ' + error.message });
- }
- });
- // 更新 OCR 文本(人工纠错)
- app.put('/api/ocr/update-text', async (req, res) => {
- try {
- const { fileId, textBlocks } = req.body;
- if (!fileId || !textBlocks) {
- return res.status(400).json({ error: '文件ID和文本数据是必需的' });
- }
- await fileService.updateOcrText(parseInt(fileId), textBlocks);
- res.json({ success: true });
- } catch (error) {
- console.error('更新OCR文本失败:', error);
- res.status(500).json({ error: '更新OCR文本失败: ' + error.message });
- }
- });
- // 获取文件预览接口
- app.get('/api/files/:id/preview', async (req, res) => {
- try {
- const fileId = parseInt(req.params.id);
- const file = await fileService.getFileById(fileId);
- if (!file) {
- return res.status(404).json({ error: 'File not found' });
- }
- // 检查文件是否存在
- if (!fs.existsSync(file.filePath)) {
- return res.status(404).json({ error: 'File not found on disk' });
- }
- // 设置正确的 Content-Type
- res.setHeader('Content-Type', file.mimeType);
- // 直接发送文件
- res.sendFile(path.resolve(file.filePath));
- } catch (error) {
- console.error('File preview error:', error);
- res.status(500).json({ error: 'Failed to get file preview' });
- }
- });
- // 更新批量OCR接口
- app.post('/api/ocr/batch-recognize', async (req, res) => {
- try {
- const { fileIds, config } = req.body;
- if (!fileIds || !Array.isArray(fileIds)) {
- return res.status(400).json({ error: 'File IDs array is required' });
- }
- const filePaths = [];
- for (const fileId of fileIds) {
- const file = await fileService.getFileById(parseInt(fileId));
- if (file) {
- filePaths.push(file.filePath);
- }
- }
- const results = await onnxOcrManager.batchRecognize(filePaths, config);
- res.json({
- success: true,
- data: results
- });
- } catch (error) {
- console.error('批量ONNX OCR识别失败:', error);
- res.status(500).json({ error: '批量识别失败: ' + error.message });
- }
- });
- // 获取预处理后的图片
- app.get('/api/ocr/processed-image', async (req, res) => {
- try {
- const imagePath = req.query.path as string;
- if (!imagePath) {
- return res.status(400).json({ error: '图片路径是必需的' });
- }
- // 解码路径
- const decodedPath = decodeURIComponent(imagePath);
- if (!fse.existsSync(decodedPath)) {
- return res.status(404).json({ error: '预处理图片不存在' });
- }
- res.setHeader('Content-Type', 'image/png');
- res.sendFile(path.resolve(decodedPath));
- } catch (error) {
- console.error('获取预处理图片失败:', error);
- res.status(500).json({ error: '获取预处理图片失败' });
- }
- });
- // 在 server/server.ts 中添加调试接口
- app.post('/api/ocr/debug-recognition', async (req, res) => {
- try {
- const { fileId, boxIndex } = req.body;
- if (!fileId || boxIndex === undefined) {
- return res.status(400).json({ error: '文件ID和框索引是必需的' });
- }
- const file = await fileService.getFileById(parseInt(fileId));
- if (!file) {
- return res.status(404).json({ error: '文件未找到' });
- }
- // 这里可以添加具体的调试逻辑
- console.log(`🔧 调试文件 ${fileId} 的第 ${boxIndex} 个文本框`);
- res.json({
- success: true,
- message: '调试信息已输出到控制台'
- });
- } catch (error) {
- console.error('调试失败:', error);
- res.status(500).json({ error: '调试失败: ' + error.message });
- }
- });
- // 更新OCR状态接口
- app.get('/api/ocr/status', async (req, res) => {
- try {
- const status = onnxOcrManager.getStatus();
- res.json({
- success: true,
- data: status
- });
- } catch (error) {
- console.error('获取ONNX OCR状态失败:', error);
- res.status(500).json({ error: '获取状态失败: ' + error.message });
- }
- });
- // 获取文件缩略图接口
- app.get('/api/files/:id/thumbnail', async (req, res) => {
- const fileId = parseInt(req.params.id);
- const file = await fileService.getFileById(fileId);
- try {
- if (!file) {
- return res.status(404).json({ error: 'File not found' });
- }
- // 只对图片生成缩略图
- if (!file.mimeType.startsWith('image/')) {
- return res.status(400).json({ error: 'Not an image file' });
- }
- const thumbnailPath = path.join(tempDir, `thumbnail-${fileId}.jpg`);
- // 生成缩略图
- await sharp(file.filePath)
- .resize(100, 100, {
- fit: 'inside',
- withoutEnlargement: true
- })
- .jpeg({ quality: 80 })
- .toFile(thumbnailPath);
- res.sendFile(path.resolve(thumbnailPath));
- } catch (error) {
- console.error('Thumbnail generation error:', error);
- // 如果缩略图生成失败,返回原图
- res.sendFile(path.resolve(file.filePath));
- }
- });
- // 健康检查接口
- app.get('/api/health', (req, res) => {
- res.json({
- status: 'OK',
- timestamp: new Date().toISOString(),
- service: 'file-management-api'
- });
- });
- // 服务器启动时初始化OCR引擎
- async function initializeOcrEngine() {
- try {
- console.log('正在初始化ONNX OCR引擎...');
- await onnxOcrManager.initialize();
- console.log('ONNX OCR引擎初始化完成');
- } catch (error) {
- console.error('ONNX OCR引擎初始化失败:', error);
- }
- }
- function startServer() {
- // 启动时初始化OCR引擎
- initializeOcrEngine();
- app.listen(PORT, () => {
- console.log(`Server running on http://localhost:${PORT}`);
- });
- }
- export { startServer };
|