server.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. import express from 'express';
  2. import cors from 'cors';
  3. import multer from 'multer';
  4. import path from 'path';
  5. import fs from 'fs-extra';
  6. import { calculateFileMD5 } from './utils.js';
  7. import { initDatabase, FileService } from '../database/database.js';
  8. import onnxOcrManager from "./utils/onnxOcrManager.js";
  9. import sharp from "sharp";
  10. import fse from "fs-extra";
  11. const app = express();
  12. const PORT = 3000;
  13. // 初始化数据库
  14. initDatabase();
  15. const fileService = new FileService();
  16. // 确保上传目录和临时目录存在
  17. const uploadDir = path.join(process.cwd(), 'uploads');
  18. const tempDir = path.join(process.cwd(), 'temp');
  19. const processedDir = path.join(process.cwd(), 'processed');
  20. fs.ensureDirSync(uploadDir);
  21. fs.ensureDirSync(tempDir);
  22. fs.ensureDirSync(processedDir);
  23. // 配置 multer - 修复中文文件名问题
  24. const storage = multer.diskStorage({
  25. destination: (req, file, cb) => {
  26. cb(null, uploadDir);
  27. },
  28. filename: (req, file, cb) => {
  29. // 处理中文文件名 - 使用原始文件名但确保安全
  30. const originalName = Buffer.from(file.originalname, 'latin1').toString('utf8');
  31. const ext = path.extname(originalName);
  32. const name = path.basename(originalName, ext);
  33. // 清理文件名,移除特殊字符
  34. const safeName = name.replace(/[^a-zA-Z0-9\u4e00-\u9fa5]/g, '_');
  35. const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
  36. const filename = safeName + '-' + uniqueSuffix + ext;
  37. cb(null, filename);
  38. }
  39. });
  40. const upload = multer({
  41. storage,
  42. fileFilter: (req, file, cb) => {
  43. // 处理文件名编码
  44. file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8');
  45. cb(null, true);
  46. }
  47. });
  48. // 设置响应头,确保使用 UTF-8 编码
  49. app.use((req, res, next) => {
  50. res.setHeader('Content-Type', 'application/json; charset=utf-8');
  51. next();
  52. });
  53. app.use(cors());
  54. app.use(express.json({ limit: '50mb' }));
  55. app.use(express.urlencoded({ extended: true, limit: '50mb' }));
  56. // 文件上传接口
  57. app.post('/api/upload', upload.single('file'), async (req, res) => {
  58. try {
  59. if (!req.file) {
  60. return res.status(400).json({ error: 'No file uploaded' });
  61. }
  62. // 确保文件名正确编码
  63. const originalName = Buffer.from(req.file.originalname, 'latin1').toString('utf8');
  64. const fileInfo = {
  65. originalName: originalName,
  66. fileName: req.file.filename,
  67. filePath: req.file.path,
  68. fileSize: req.file.size,
  69. mimeType: req.file.mimetype
  70. };
  71. // 计算 MD5
  72. const md5 = await calculateFileMD5(req.file.path);
  73. // 保存到数据库
  74. const fileRecord = await fileService.createFile({
  75. ...fileInfo,
  76. md5
  77. });
  78. res.json({
  79. success: true,
  80. data: fileRecord
  81. });
  82. } catch (error) {
  83. console.error('Upload error:', error);
  84. res.status(500).json({ error: 'Upload failed: ' + error.message });
  85. }
  86. });
  87. // 获取文件列表接口
  88. app.get('/api/files', async (req, res) => {
  89. try {
  90. const page = parseInt(req.query.page as string) || 1;
  91. const pageSize = parseInt(req.query.pageSize as string) || 100;
  92. const result = await fileService.getFilesPaginated(page, pageSize);
  93. // 返回统一的数据结构
  94. res.json({
  95. success: true,
  96. data: result.files, // 直接返回文件数组
  97. pagination: result.pagination
  98. });
  99. } catch (error) {
  100. console.error('Get files error:', error);
  101. res.status(500).json({
  102. success: false,
  103. error: 'Failed to get files: ' + error.message
  104. });
  105. }
  106. });
  107. // MD5 检查接口
  108. app.post('/api/files/:id/check-md5', async (req, res) => {
  109. try {
  110. const fileId = parseInt(req.params.id);
  111. const file = await fileService.getFileById(fileId);
  112. if (!file) {
  113. return res.status(404).json({ error: 'File not found' });
  114. }
  115. const currentMD5 = await calculateFileMD5(file.filePath);
  116. const isChanged = currentMD5 !== file.md5;
  117. res.json({
  118. isChanged,
  119. currentMD5,
  120. originalMD5: file.md5,
  121. file
  122. });
  123. } catch (error) {
  124. console.error('MD5 check error:', error);
  125. res.status(500).json({ error: 'MD5 check failed' });
  126. }
  127. });
  128. // 更新 MD5 接口
  129. app.put('/api/files/:id/update-md5', async (req, res) => {
  130. try {
  131. const fileId = parseInt(req.params.id);
  132. const { md5 } = req.body;
  133. await fileService.updateFileMD5(fileId, md5);
  134. res.json({ success: true });
  135. } catch (error) {
  136. console.error('Update MD5 error:', error);
  137. res.status(500).json({ error: 'Update failed' });
  138. }
  139. });
  140. // OCR 识别接口 - 使用 OfflineOcrManager
  141. app.post('/api/ocr/recognize', async (req, res) => {
  142. try {
  143. const { fileId, config } = req.body;
  144. if (!fileId) {
  145. return res.status(400).json({ error: 'File ID is required' });
  146. }
  147. const file = await fileService.getFileById(parseInt(fileId));
  148. if (!file) {
  149. return res.status(404).json({ error: 'File not found' });
  150. }
  151. console.log(`开始ONNX OCR识别: ${file.originalName}`);
  152. // 使用ONNX OCR管理器进行识别
  153. const result = await onnxOcrManager.recognizeImage(file.filePath, config);
  154. res.json({
  155. success: true,
  156. data: {
  157. textBlocks: result.textBlocks,
  158. totalPages: result.totalPages,
  159. processingTime: result.processingTime,
  160. confidence: result.confidence,
  161. processedImageUrl: '', // ONNX版本暂时不提供处理后的图片
  162. imageInfo: result.imageInfo,
  163. isOffline: result.isOffline
  164. }
  165. });
  166. } catch (error) {
  167. console.error('ONNX OCR识别失败:', error);
  168. res.status(500).json({ error: 'OCR识别失败: ' + error.message });
  169. }
  170. });
  171. // 保存处理后的图片
  172. async function saveProcessedImage(fileId: number, processedImagePath: string): Promise<string> {
  173. try {
  174. const targetPath = path.join(processedDir, `processed-${fileId}.png`);
  175. // 使用sharp处理并保存图片
  176. await sharp(processedImagePath)
  177. .grayscale()
  178. .normalize()
  179. .sharpen()
  180. .png()
  181. .toFile(targetPath);
  182. return `/api/files/${fileId}/processed-image`;
  183. } catch (error) {
  184. console.error('保存处理后的图片失败:', error);
  185. return '';
  186. }
  187. }
  188. // 获取处理后的图片
  189. app.get('/api/files/:id/processed-image', async (req, res) => {
  190. try {
  191. const fileId = parseInt(req.params.id);
  192. const processedImagePath = path.join(processedDir, `processed-${fileId}.png`);
  193. if (!fs.existsSync(processedImagePath)) {
  194. return res.status(404).json({ error: 'Processed image not found' });
  195. }
  196. res.setHeader('Content-Type', 'image/png');
  197. res.sendFile(path.resolve(processedImagePath));
  198. } catch (error) {
  199. console.error('Get processed image error:', error);
  200. res.status(500).json({ error: 'Failed to get processed image' });
  201. }
  202. });
  203. // 保存 OCR 结果
  204. app.post('/api/ocr/save-result', async (req, res) => {
  205. try {
  206. const { fileId, ocrData } = req.body;
  207. if (!fileId || !ocrData) {
  208. return res.status(400).json({ error: '文件ID和OCR数据是必需的' });
  209. }
  210. await fileService.saveOcrResult(parseInt(fileId), ocrData);
  211. res.json({ success: true });
  212. } catch (error) {
  213. console.error('保存OCR结果失败:', error);
  214. res.status(500).json({ error: '保存OCR结果失败: ' + error.message });
  215. }
  216. });
  217. // 获取 OCR 结果
  218. app.get('/api/ocr/result/:fileId', async (req, res) => {
  219. try {
  220. const fileId = parseInt(req.params.fileId);
  221. const result = await fileService.getOcrResult(fileId);
  222. if (result) {
  223. res.json({
  224. success: true,
  225. data: result.ocr_data
  226. });
  227. } else {
  228. res.json({
  229. success: false,
  230. error: '未找到OCR结果'
  231. });
  232. }
  233. } catch (error) {
  234. console.error('获取OCR结果失败:', error);
  235. res.status(500).json({ error: '获取OCR结果失败: ' + error.message });
  236. }
  237. });
  238. // 更新 OCR 文本(人工纠错)
  239. app.put('/api/ocr/update-text', async (req, res) => {
  240. try {
  241. const { fileId, textBlocks } = req.body;
  242. if (!fileId || !textBlocks) {
  243. return res.status(400).json({ error: '文件ID和文本数据是必需的' });
  244. }
  245. await fileService.updateOcrText(parseInt(fileId), textBlocks);
  246. res.json({ success: true });
  247. } catch (error) {
  248. console.error('更新OCR文本失败:', error);
  249. res.status(500).json({ error: '更新OCR文本失败: ' + error.message });
  250. }
  251. });
  252. // 获取文件预览接口
  253. app.get('/api/files/:id/preview', async (req, res) => {
  254. try {
  255. const fileId = parseInt(req.params.id);
  256. const file = await fileService.getFileById(fileId);
  257. if (!file) {
  258. return res.status(404).json({ error: 'File not found' });
  259. }
  260. // 检查文件是否存在
  261. if (!fs.existsSync(file.filePath)) {
  262. return res.status(404).json({ error: 'File not found on disk' });
  263. }
  264. // 设置正确的 Content-Type
  265. res.setHeader('Content-Type', file.mimeType);
  266. // 直接发送文件
  267. res.sendFile(path.resolve(file.filePath));
  268. } catch (error) {
  269. console.error('File preview error:', error);
  270. res.status(500).json({ error: 'Failed to get file preview' });
  271. }
  272. });
  273. // 更新批量OCR接口
  274. app.post('/api/ocr/batch-recognize', async (req, res) => {
  275. try {
  276. const { fileIds, config } = req.body;
  277. if (!fileIds || !Array.isArray(fileIds)) {
  278. return res.status(400).json({ error: 'File IDs array is required' });
  279. }
  280. const filePaths = [];
  281. for (const fileId of fileIds) {
  282. const file = await fileService.getFileById(parseInt(fileId));
  283. if (file) {
  284. filePaths.push(file.filePath);
  285. }
  286. }
  287. const results = await onnxOcrManager.batchRecognize(filePaths, config);
  288. res.json({
  289. success: true,
  290. data: results
  291. });
  292. } catch (error) {
  293. console.error('批量ONNX OCR识别失败:', error);
  294. res.status(500).json({ error: '批量识别失败: ' + error.message });
  295. }
  296. });
  297. // 获取预处理后的图片
  298. app.get('/api/ocr/processed-image', async (req, res) => {
  299. try {
  300. const imagePath = req.query.path as string;
  301. if (!imagePath) {
  302. return res.status(400).json({ error: '图片路径是必需的' });
  303. }
  304. // 解码路径
  305. const decodedPath = decodeURIComponent(imagePath);
  306. if (!fse.existsSync(decodedPath)) {
  307. return res.status(404).json({ error: '预处理图片不存在' });
  308. }
  309. res.setHeader('Content-Type', 'image/png');
  310. res.sendFile(path.resolve(decodedPath));
  311. } catch (error) {
  312. console.error('获取预处理图片失败:', error);
  313. res.status(500).json({ error: '获取预处理图片失败' });
  314. }
  315. });
  316. // 在 server/server.ts 中添加调试接口
  317. app.post('/api/ocr/debug-recognition', async (req, res) => {
  318. try {
  319. const { fileId, boxIndex } = req.body;
  320. if (!fileId || boxIndex === undefined) {
  321. return res.status(400).json({ error: '文件ID和框索引是必需的' });
  322. }
  323. const file = await fileService.getFileById(parseInt(fileId));
  324. if (!file) {
  325. return res.status(404).json({ error: '文件未找到' });
  326. }
  327. // 这里可以添加具体的调试逻辑
  328. console.log(`🔧 调试文件 ${fileId} 的第 ${boxIndex} 个文本框`);
  329. res.json({
  330. success: true,
  331. message: '调试信息已输出到控制台'
  332. });
  333. } catch (error) {
  334. console.error('调试失败:', error);
  335. res.status(500).json({ error: '调试失败: ' + error.message });
  336. }
  337. });
  338. // 更新OCR状态接口
  339. app.get('/api/ocr/status', async (req, res) => {
  340. try {
  341. const status = onnxOcrManager.getStatus();
  342. res.json({
  343. success: true,
  344. data: status
  345. });
  346. } catch (error) {
  347. console.error('获取ONNX OCR状态失败:', error);
  348. res.status(500).json({ error: '获取状态失败: ' + error.message });
  349. }
  350. });
  351. // 获取文件缩略图接口
  352. app.get('/api/files/:id/thumbnail', async (req, res) => {
  353. const fileId = parseInt(req.params.id);
  354. const file = await fileService.getFileById(fileId);
  355. try {
  356. if (!file) {
  357. return res.status(404).json({ error: 'File not found' });
  358. }
  359. // 只对图片生成缩略图
  360. if (!file.mimeType.startsWith('image/')) {
  361. return res.status(400).json({ error: 'Not an image file' });
  362. }
  363. const thumbnailPath = path.join(tempDir, `thumbnail-${fileId}.jpg`);
  364. // 生成缩略图
  365. await sharp(file.filePath)
  366. .resize(100, 100, {
  367. fit: 'inside',
  368. withoutEnlargement: true
  369. })
  370. .jpeg({ quality: 80 })
  371. .toFile(thumbnailPath);
  372. res.sendFile(path.resolve(thumbnailPath));
  373. } catch (error) {
  374. console.error('Thumbnail generation error:', error);
  375. // 如果缩略图生成失败,返回原图
  376. res.sendFile(path.resolve(file.filePath));
  377. }
  378. });
  379. // 健康检查接口
  380. app.get('/api/health', (req, res) => {
  381. res.json({
  382. status: 'OK',
  383. timestamp: new Date().toISOString(),
  384. service: 'file-management-api'
  385. });
  386. });
  387. // 服务器启动时初始化OCR引擎
  388. async function initializeOcrEngine() {
  389. try {
  390. console.log('正在初始化ONNX OCR引擎...');
  391. await onnxOcrManager.initialize();
  392. console.log('ONNX OCR引擎初始化完成');
  393. } catch (error) {
  394. console.error('ONNX OCR引擎初始化失败:', error);
  395. }
  396. }
  397. function startServer() {
  398. // 启动时初始化OCR引擎
  399. initializeOcrEngine();
  400. app.listen(PORT, () => {
  401. console.log(`Server running on http://localhost:${PORT}`);
  402. });
  403. }
  404. export { startServer };