const sqlite3 = require('sqlite3') const path = require('path') const { open } = require('sqlite') const fs = require('fs-extra') const dbPath = path.join(process.cwd(), 'database/files.db') // 确保数据库目录存在 const dbDir = path.dirname(dbPath) fs.ensureDirSync(dbDir) async function initDatabase() { const db = await open({ filename: dbPath, driver: sqlite3.Database }) // 设置数据库编码为 UTF-8 await db.exec('PRAGMA encoding = "UTF-8"') await db.exec('PRAGMA foreign_keys = ON') await db.exec(` CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY AUTOINCREMENT, original_name TEXT NOT NULL, file_name TEXT NOT NULL, file_path TEXT NOT NULL, file_size INTEGER NOT NULL, mime_type TEXT NOT NULL, md5 TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ) `) // 新增 OCR 结果表 await db.exec(` CREATE TABLE IF NOT EXISTS ocr_results ( id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, ocr_data TEXT NOT NULL, confidence REAL, processing_time INTEGER, recognized_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE, UNIQUE(file_id) ) `) await db.close() } class FileService { async getDb() { const db = await open({ filename: dbPath, driver: sqlite3.Database }) // 确保每次连接都使用 UTF-8 await db.exec('PRAGMA encoding = "UTF-8"') return db } async createFile(fileData) { const db = await this.getDb() // 确保文件名正确存储 const result = await db.run( `INSERT INTO files (original_name, file_name, file_path, file_size, mime_type, md5) VALUES (?, ?, ?, ?, ?, ?)`, [ fileData.originalName, fileData.fileName, fileData.filePath, fileData.fileSize, fileData.mimeType, fileData.md5 ] ) const file = await db.get( 'SELECT * FROM files WHERE id = ?', result.lastID ) await db.close() return this.mapDatabaseToFileRecord(file) } async getFilesPaginated(page, pageSize) { const db = await this.getDb() const offset = (page - 1) * pageSize const files = await db.all( 'SELECT * FROM files ORDER BY created_at DESC LIMIT ? OFFSET ?', [pageSize, offset] ) const totalResult = await db.get('SELECT COUNT(*) as count FROM files') const total = totalResult.count await db.close() return { files: files.map(file => this.mapDatabaseToFileRecord(file)), pagination: { page, pageSize, total, totalPages: Math.ceil(total / pageSize) } } } async getFileById(id) { const db = await this.getDb() const file = await db.get('SELECT * FROM files WHERE id = ?', [id]) await db.close() return file ? this.mapDatabaseToFileRecord(file) : null } async updateFileMD5(id, md5) { const db = await this.getDb() await db.run( 'UPDATE files SET md5 = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?', [md5, id] ) await db.close() } mapDatabaseToFileRecord(dbFile) { // 确保从数据库读取时正确处理编码 let originalName = dbFile.original_name try { // 尝试解码,如果已经是正确编码则不会影响 originalName = decodeURIComponent(originalName) } catch (error) { console.warn('文件名解码失败,使用原值:', error) } return { id: dbFile.id, originalName: originalName, fileName: dbFile.file_name, filePath: dbFile.file_path, fileSize: dbFile.file_size, mimeType: dbFile.mime_type, md5: dbFile.md5, createdAt: dbFile.created_at, updatedAt: dbFile.updated_at } } async saveOcrResult(fileId, ocrData) { const db = await this.getDb() // 将 OCR 数据转为 JSON 字符串存储 const ocrDataJson = JSON.stringify(ocrData) try { // 尝试更新已存在的记录 const result = await db.run( `UPDATE ocr_results SET ocr_data = ?, confidence = ?, processing_time = ?, updated_at = CURRENT_TIMESTAMP WHERE file_id = ?`, [ocrDataJson, ocrData.confidence, ocrData.processingTime, fileId] ) // 如果没有更新任何行,则插入新记录 if (result.changes === 0) { await db.run( `INSERT INTO ocr_results (file_id, ocr_data, confidence, processing_time) VALUES (?, ?, ?, ?)`, [fileId, ocrDataJson, ocrData.confidence, ocrData.processingTime] ) } await db.close() return { success: true } } catch (error) { await db.close() throw error } } async getOcrResult(fileId) { const db = await this.getDb() const result = await db.get( 'SELECT * FROM ocr_results WHERE file_id = ?', [fileId] ) await db.close() if (result) { return { ...result, ocr_data: JSON.parse(result.ocr_data) } } return null } async updateOcrText(fileId, newTextBlocks) { const db = await this.getDb() const existingResult = await this.getOcrResult(fileId) if (!existingResult) { throw new Error('没有找到OCR结果') } // 更新文本块 const updatedOcrData = { ...existingResult.ocr_data, textBlocks: newTextBlocks, updatedAt: new Date().toISOString(), manuallyCorrected: true } const ocrDataJson = JSON.stringify(updatedOcrData) await db.run( 'UPDATE ocr_results SET ocr_data = ?, updated_at = CURRENT_TIMESTAMP WHERE file_id = ?', [ocrDataJson, fileId] ) await db.close() return { success: true } } } module.exports = { initDatabase, FileService }