235 行
6.9 KiB
JavaScript
235 行
6.9 KiB
JavaScript
|
|
const sqlite3 = require('sqlite3')
|
||
|
|
const path = require('path')
|
||
|
|
const { open } = require('sqlite')
|
||
|
|
const fs = require('fs-extra')
|
||
|
|
|
||
|
|
const dbPath = path.join(process.cwd(), 'database/files.db')
|
||
|
|
|
||
|
|
// 确保数据库目录存在
|
||
|
|
const dbDir = path.dirname(dbPath)
|
||
|
|
fs.ensureDirSync(dbDir)
|
||
|
|
|
||
|
|
async function initDatabase() {
|
||
|
|
const db = await open({
|
||
|
|
filename: dbPath,
|
||
|
|
driver: sqlite3.Database
|
||
|
|
})
|
||
|
|
|
||
|
|
// 设置数据库编码为 UTF-8
|
||
|
|
await db.exec('PRAGMA encoding = "UTF-8"')
|
||
|
|
await db.exec('PRAGMA foreign_keys = ON')
|
||
|
|
|
||
|
|
await db.exec(`
|
||
|
|
CREATE TABLE IF NOT EXISTS files (
|
||
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
|
|
original_name TEXT NOT NULL,
|
||
|
|
file_name TEXT NOT NULL,
|
||
|
|
file_path TEXT NOT NULL,
|
||
|
|
file_size INTEGER NOT NULL,
|
||
|
|
mime_type TEXT NOT NULL,
|
||
|
|
md5 TEXT NOT NULL,
|
||
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||
|
|
)
|
||
|
|
`)
|
||
|
|
|
||
|
|
// 新增 OCR 结果表
|
||
|
|
await db.exec(`
|
||
|
|
CREATE TABLE IF NOT EXISTS ocr_results (
|
||
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
|
|
file_id INTEGER NOT NULL,
|
||
|
|
ocr_data TEXT NOT NULL,
|
||
|
|
confidence REAL,
|
||
|
|
processing_time INTEGER,
|
||
|
|
recognized_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||
|
|
FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE,
|
||
|
|
UNIQUE(file_id)
|
||
|
|
)
|
||
|
|
`)
|
||
|
|
|
||
|
|
await db.close()
|
||
|
|
}
|
||
|
|
|
||
|
|
class FileService {
|
||
|
|
async getDb() {
|
||
|
|
const db = await open({
|
||
|
|
filename: dbPath,
|
||
|
|
driver: sqlite3.Database
|
||
|
|
})
|
||
|
|
|
||
|
|
// 确保每次连接都使用 UTF-8
|
||
|
|
await db.exec('PRAGMA encoding = "UTF-8"')
|
||
|
|
return db
|
||
|
|
}
|
||
|
|
|
||
|
|
async createFile(fileData) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
|
||
|
|
// 确保文件名正确存储
|
||
|
|
const result = await db.run(
|
||
|
|
`INSERT INTO files (original_name, file_name, file_path, file_size, mime_type, md5)
|
||
|
|
VALUES (?, ?, ?, ?, ?, ?)`,
|
||
|
|
[
|
||
|
|
fileData.originalName,
|
||
|
|
fileData.fileName,
|
||
|
|
fileData.filePath,
|
||
|
|
fileData.fileSize,
|
||
|
|
fileData.mimeType,
|
||
|
|
fileData.md5
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
const file = await db.get(
|
||
|
|
'SELECT * FROM files WHERE id = ?',
|
||
|
|
result.lastID
|
||
|
|
)
|
||
|
|
|
||
|
|
await db.close()
|
||
|
|
|
||
|
|
return this.mapDatabaseToFileRecord(file)
|
||
|
|
}
|
||
|
|
|
||
|
|
async getFilesPaginated(page, pageSize) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
const offset = (page - 1) * pageSize
|
||
|
|
|
||
|
|
const files = await db.all(
|
||
|
|
'SELECT * FROM files ORDER BY created_at DESC LIMIT ? OFFSET ?',
|
||
|
|
[pageSize, offset]
|
||
|
|
)
|
||
|
|
|
||
|
|
const totalResult = await db.get('SELECT COUNT(*) as count FROM files')
|
||
|
|
const total = totalResult.count
|
||
|
|
|
||
|
|
await db.close()
|
||
|
|
|
||
|
|
return {
|
||
|
|
files: files.map(file => this.mapDatabaseToFileRecord(file)),
|
||
|
|
pagination: {
|
||
|
|
page,
|
||
|
|
pageSize,
|
||
|
|
total,
|
||
|
|
totalPages: Math.ceil(total / pageSize)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
async getFileById(id) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
const file = await db.get('SELECT * FROM files WHERE id = ?', [id])
|
||
|
|
await db.close()
|
||
|
|
return file ? this.mapDatabaseToFileRecord(file) : null
|
||
|
|
}
|
||
|
|
|
||
|
|
async updateFileMD5(id, md5) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
await db.run(
|
||
|
|
'UPDATE files SET md5 = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?',
|
||
|
|
[md5, id]
|
||
|
|
)
|
||
|
|
await db.close()
|
||
|
|
}
|
||
|
|
|
||
|
|
mapDatabaseToFileRecord(dbFile) {
|
||
|
|
// 确保从数据库读取时正确处理编码
|
||
|
|
let originalName = dbFile.original_name
|
||
|
|
try {
|
||
|
|
// 尝试解码,如果已经是正确编码则不会影响
|
||
|
|
originalName = decodeURIComponent(originalName)
|
||
|
|
} catch (error) {
|
||
|
|
console.warn('文件名解码失败,使用原值:', error)
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
id: dbFile.id,
|
||
|
|
originalName: originalName,
|
||
|
|
fileName: dbFile.file_name,
|
||
|
|
filePath: dbFile.file_path,
|
||
|
|
fileSize: dbFile.file_size,
|
||
|
|
mimeType: dbFile.mime_type,
|
||
|
|
md5: dbFile.md5,
|
||
|
|
createdAt: dbFile.created_at,
|
||
|
|
updatedAt: dbFile.updated_at
|
||
|
|
}
|
||
|
|
}
|
||
|
|
async saveOcrResult(fileId, ocrData) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
|
||
|
|
// 将 OCR 数据转为 JSON 字符串存储
|
||
|
|
const ocrDataJson = JSON.stringify(ocrData)
|
||
|
|
|
||
|
|
try {
|
||
|
|
// 尝试更新已存在的记录
|
||
|
|
const result = await db.run(
|
||
|
|
`UPDATE ocr_results SET ocr_data = ?, confidence = ?, processing_time = ?, updated_at = CURRENT_TIMESTAMP
|
||
|
|
WHERE file_id = ?`,
|
||
|
|
[ocrDataJson, ocrData.confidence, ocrData.processingTime, fileId]
|
||
|
|
)
|
||
|
|
|
||
|
|
// 如果没有更新任何行,则插入新记录
|
||
|
|
if (result.changes === 0) {
|
||
|
|
await db.run(
|
||
|
|
`INSERT INTO ocr_results (file_id, ocr_data, confidence, processing_time)
|
||
|
|
VALUES (?, ?, ?, ?)`,
|
||
|
|
[fileId, ocrDataJson, ocrData.confidence, ocrData.processingTime]
|
||
|
|
)
|
||
|
|
}
|
||
|
|
|
||
|
|
await db.close()
|
||
|
|
return { success: true }
|
||
|
|
} catch (error) {
|
||
|
|
await db.close()
|
||
|
|
throw error
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
async getOcrResult(fileId) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
|
||
|
|
const result = await db.get(
|
||
|
|
'SELECT * FROM ocr_results WHERE file_id = ?',
|
||
|
|
[fileId]
|
||
|
|
)
|
||
|
|
|
||
|
|
await db.close()
|
||
|
|
|
||
|
|
if (result) {
|
||
|
|
return {
|
||
|
|
...result,
|
||
|
|
ocr_data: JSON.parse(result.ocr_data)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return null
|
||
|
|
}
|
||
|
|
|
||
|
|
async updateOcrText(fileId, newTextBlocks) {
|
||
|
|
const db = await this.getDb()
|
||
|
|
|
||
|
|
const existingResult = await this.getOcrResult(fileId)
|
||
|
|
if (!existingResult) {
|
||
|
|
throw new Error('没有找到OCR结果')
|
||
|
|
}
|
||
|
|
|
||
|
|
// 更新文本块
|
||
|
|
const updatedOcrData = {
|
||
|
|
...existingResult.ocr_data,
|
||
|
|
textBlocks: newTextBlocks,
|
||
|
|
updatedAt: new Date().toISOString(),
|
||
|
|
manuallyCorrected: true
|
||
|
|
}
|
||
|
|
|
||
|
|
const ocrDataJson = JSON.stringify(updatedOcrData)
|
||
|
|
|
||
|
|
await db.run(
|
||
|
|
'UPDATE ocr_results SET ocr_data = ?, updated_at = CURRENT_TIMESTAMP WHERE file_id = ?',
|
||
|
|
[ocrDataJson, fileId]
|
||
|
|
)
|
||
|
|
|
||
|
|
await db.close()
|
||
|
|
return { success: true }
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
module.exports = { initDatabase, FileService }
|