| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- const sqlite3 = require('sqlite3')
- const path = require('path')
- const { open } = require('sqlite')
- const fs = require('fs-extra')
- const dbPath = path.join(process.cwd(), 'database/files.db')
- // 确保数据库目录存在
- const dbDir = path.dirname(dbPath)
- fs.ensureDirSync(dbDir)
- async function initDatabase() {
- const db = await open({
- filename: dbPath,
- driver: sqlite3.Database
- })
- // 设置数据库编码为 UTF-8
- await db.exec('PRAGMA encoding = "UTF-8"')
- await db.exec('PRAGMA foreign_keys = ON')
- await db.exec(`
- CREATE TABLE IF NOT EXISTS files (
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- original_name TEXT NOT NULL,
- file_name TEXT NOT NULL,
- file_path TEXT NOT NULL,
- file_size INTEGER NOT NULL,
- mime_type TEXT NOT NULL,
- md5 TEXT NOT NULL,
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
- updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
- )
- `)
- // 新增 OCR 结果表
- await db.exec(`
- CREATE TABLE IF NOT EXISTS ocr_results (
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- file_id INTEGER NOT NULL,
- ocr_data TEXT NOT NULL,
- confidence REAL,
- processing_time INTEGER,
- recognized_at DATETIME DEFAULT CURRENT_TIMESTAMP,
- updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
- FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE,
- UNIQUE(file_id)
- )
- `)
- await db.close()
- }
- class FileService {
- async getDb() {
- const db = await open({
- filename: dbPath,
- driver: sqlite3.Database
- })
- // 确保每次连接都使用 UTF-8
- await db.exec('PRAGMA encoding = "UTF-8"')
- return db
- }
- async createFile(fileData) {
- const db = await this.getDb()
- // 确保文件名正确存储
- const result = await db.run(
- `INSERT INTO files (original_name, file_name, file_path, file_size, mime_type, md5)
- VALUES (?, ?, ?, ?, ?, ?)`,
- [
- fileData.originalName,
- fileData.fileName,
- fileData.filePath,
- fileData.fileSize,
- fileData.mimeType,
- fileData.md5
- ]
- )
- const file = await db.get(
- 'SELECT * FROM files WHERE id = ?',
- result.lastID
- )
- await db.close()
- return this.mapDatabaseToFileRecord(file)
- }
- async getFilesPaginated(page, pageSize) {
- const db = await this.getDb()
- const offset = (page - 1) * pageSize
- const files = await db.all(
- 'SELECT * FROM files ORDER BY created_at DESC LIMIT ? OFFSET ?',
- [pageSize, offset]
- )
- const totalResult = await db.get('SELECT COUNT(*) as count FROM files')
- const total = totalResult.count
- await db.close()
- return {
- files: files.map(file => this.mapDatabaseToFileRecord(file)),
- pagination: {
- page,
- pageSize,
- total,
- totalPages: Math.ceil(total / pageSize)
- }
- }
- }
- async getFileById(id) {
- const db = await this.getDb()
- const file = await db.get('SELECT * FROM files WHERE id = ?', [id])
- await db.close()
- return file ? this.mapDatabaseToFileRecord(file) : null
- }
- async updateFileMD5(id, md5) {
- const db = await this.getDb()
- await db.run(
- 'UPDATE files SET md5 = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?',
- [md5, id]
- )
- await db.close()
- }
- mapDatabaseToFileRecord(dbFile) {
- // 确保从数据库读取时正确处理编码
- let originalName = dbFile.original_name
- try {
- // 尝试解码,如果已经是正确编码则不会影响
- originalName = decodeURIComponent(originalName)
- } catch (error) {
- console.warn('文件名解码失败,使用原值:', error)
- }
- return {
- id: dbFile.id,
- originalName: originalName,
- fileName: dbFile.file_name,
- filePath: dbFile.file_path,
- fileSize: dbFile.file_size,
- mimeType: dbFile.mime_type,
- md5: dbFile.md5,
- createdAt: dbFile.created_at,
- updatedAt: dbFile.updated_at
- }
- }
- async saveOcrResult(fileId, ocrData) {
- const db = await this.getDb()
- // 将 OCR 数据转为 JSON 字符串存储
- const ocrDataJson = JSON.stringify(ocrData)
- try {
- // 尝试更新已存在的记录
- const result = await db.run(
- `UPDATE ocr_results SET ocr_data = ?, confidence = ?, processing_time = ?, updated_at = CURRENT_TIMESTAMP
- WHERE file_id = ?`,
- [ocrDataJson, ocrData.confidence, ocrData.processingTime, fileId]
- )
- // 如果没有更新任何行,则插入新记录
- if (result.changes === 0) {
- await db.run(
- `INSERT INTO ocr_results (file_id, ocr_data, confidence, processing_time)
- VALUES (?, ?, ?, ?)`,
- [fileId, ocrDataJson, ocrData.confidence, ocrData.processingTime]
- )
- }
- await db.close()
- return { success: true }
- } catch (error) {
- await db.close()
- throw error
- }
- }
- async getOcrResult(fileId) {
- const db = await this.getDb()
- const result = await db.get(
- 'SELECT * FROM ocr_results WHERE file_id = ?',
- [fileId]
- )
- await db.close()
- if (result) {
- return {
- ...result,
- ocr_data: JSON.parse(result.ocr_data)
- }
- }
- return null
- }
- async updateOcrText(fileId, newTextBlocks) {
- const db = await this.getDb()
- const existingResult = await this.getOcrResult(fileId)
- if (!existingResult) {
- throw new Error('没有找到OCR结果')
- }
- // 更新文本块
- const updatedOcrData = {
- ...existingResult.ocr_data,
- textBlocks: newTextBlocks,
- updatedAt: new Date().toISOString(),
- manuallyCorrected: true
- }
- const ocrDataJson = JSON.stringify(updatedOcrData)
- await db.run(
- 'UPDATE ocr_results SET ocr_data = ?, updated_at = CURRENT_TIMESTAMP WHERE file_id = ?',
- [ocrDataJson, fileId]
- )
- await db.close()
- return { success: true }
- }
- }
- module.exports = { initDatabase, FileService }
|