database.js 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. const sqlite3 = require('sqlite3')
  2. const path = require('path')
  3. const { open } = require('sqlite')
  4. const fs = require('fs-extra')
  5. const dbPath = path.join(process.cwd(), 'database/files.db')
  6. // 确保数据库目录存在
  7. const dbDir = path.dirname(dbPath)
  8. fs.ensureDirSync(dbDir)
  9. async function initDatabase() {
  10. const db = await open({
  11. filename: dbPath,
  12. driver: sqlite3.Database
  13. })
  14. // 设置数据库编码为 UTF-8
  15. await db.exec('PRAGMA encoding = "UTF-8"')
  16. await db.exec('PRAGMA foreign_keys = ON')
  17. await db.exec(`
  18. CREATE TABLE IF NOT EXISTS files (
  19. id INTEGER PRIMARY KEY AUTOINCREMENT,
  20. original_name TEXT NOT NULL,
  21. file_name TEXT NOT NULL,
  22. file_path TEXT NOT NULL,
  23. file_size INTEGER NOT NULL,
  24. mime_type TEXT NOT NULL,
  25. md5 TEXT NOT NULL,
  26. created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  27. updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
  28. )
  29. `)
  30. // 新增 OCR 结果表
  31. await db.exec(`
  32. CREATE TABLE IF NOT EXISTS ocr_results (
  33. id INTEGER PRIMARY KEY AUTOINCREMENT,
  34. file_id INTEGER NOT NULL,
  35. ocr_data TEXT NOT NULL,
  36. confidence REAL,
  37. processing_time INTEGER,
  38. recognized_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  39. updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  40. FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE,
  41. UNIQUE(file_id)
  42. )
  43. `)
  44. await db.close()
  45. }
  46. class FileService {
  47. async getDb() {
  48. const db = await open({
  49. filename: dbPath,
  50. driver: sqlite3.Database
  51. })
  52. // 确保每次连接都使用 UTF-8
  53. await db.exec('PRAGMA encoding = "UTF-8"')
  54. return db
  55. }
  56. async createFile(fileData) {
  57. const db = await this.getDb()
  58. // 确保文件名正确存储
  59. const result = await db.run(
  60. `INSERT INTO files (original_name, file_name, file_path, file_size, mime_type, md5)
  61. VALUES (?, ?, ?, ?, ?, ?)`,
  62. [
  63. fileData.originalName,
  64. fileData.fileName,
  65. fileData.filePath,
  66. fileData.fileSize,
  67. fileData.mimeType,
  68. fileData.md5
  69. ]
  70. )
  71. const file = await db.get(
  72. 'SELECT * FROM files WHERE id = ?',
  73. result.lastID
  74. )
  75. await db.close()
  76. return this.mapDatabaseToFileRecord(file)
  77. }
  78. async getFilesPaginated(page, pageSize) {
  79. const db = await this.getDb()
  80. const offset = (page - 1) * pageSize
  81. const files = await db.all(
  82. 'SELECT * FROM files ORDER BY created_at DESC LIMIT ? OFFSET ?',
  83. [pageSize, offset]
  84. )
  85. const totalResult = await db.get('SELECT COUNT(*) as count FROM files')
  86. const total = totalResult.count
  87. await db.close()
  88. return {
  89. files: files.map(file => this.mapDatabaseToFileRecord(file)),
  90. pagination: {
  91. page,
  92. pageSize,
  93. total,
  94. totalPages: Math.ceil(total / pageSize)
  95. }
  96. }
  97. }
  98. async getFileById(id) {
  99. const db = await this.getDb()
  100. const file = await db.get('SELECT * FROM files WHERE id = ?', [id])
  101. await db.close()
  102. return file ? this.mapDatabaseToFileRecord(file) : null
  103. }
  104. async updateFileMD5(id, md5) {
  105. const db = await this.getDb()
  106. await db.run(
  107. 'UPDATE files SET md5 = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?',
  108. [md5, id]
  109. )
  110. await db.close()
  111. }
  112. mapDatabaseToFileRecord(dbFile) {
  113. // 确保从数据库读取时正确处理编码
  114. let originalName = dbFile.original_name
  115. try {
  116. // 尝试解码,如果已经是正确编码则不会影响
  117. originalName = decodeURIComponent(originalName)
  118. } catch (error) {
  119. console.warn('文件名解码失败,使用原值:', error)
  120. }
  121. return {
  122. id: dbFile.id,
  123. originalName: originalName,
  124. fileName: dbFile.file_name,
  125. filePath: dbFile.file_path,
  126. fileSize: dbFile.file_size,
  127. mimeType: dbFile.mime_type,
  128. md5: dbFile.md5,
  129. createdAt: dbFile.created_at,
  130. updatedAt: dbFile.updated_at
  131. }
  132. }
  133. async saveOcrResult(fileId, ocrData) {
  134. const db = await this.getDb()
  135. // 将 OCR 数据转为 JSON 字符串存储
  136. const ocrDataJson = JSON.stringify(ocrData)
  137. try {
  138. // 尝试更新已存在的记录
  139. const result = await db.run(
  140. `UPDATE ocr_results SET ocr_data = ?, confidence = ?, processing_time = ?, updated_at = CURRENT_TIMESTAMP
  141. WHERE file_id = ?`,
  142. [ocrDataJson, ocrData.confidence, ocrData.processingTime, fileId]
  143. )
  144. // 如果没有更新任何行,则插入新记录
  145. if (result.changes === 0) {
  146. await db.run(
  147. `INSERT INTO ocr_results (file_id, ocr_data, confidence, processing_time)
  148. VALUES (?, ?, ?, ?)`,
  149. [fileId, ocrDataJson, ocrData.confidence, ocrData.processingTime]
  150. )
  151. }
  152. await db.close()
  153. return { success: true }
  154. } catch (error) {
  155. await db.close()
  156. throw error
  157. }
  158. }
  159. async getOcrResult(fileId) {
  160. const db = await this.getDb()
  161. const result = await db.get(
  162. 'SELECT * FROM ocr_results WHERE file_id = ?',
  163. [fileId]
  164. )
  165. await db.close()
  166. if (result) {
  167. return {
  168. ...result,
  169. ocr_data: JSON.parse(result.ocr_data)
  170. }
  171. }
  172. return null
  173. }
  174. async updateOcrText(fileId, newTextBlocks) {
  175. const db = await this.getDb()
  176. const existingResult = await this.getOcrResult(fileId)
  177. if (!existingResult) {
  178. throw new Error('没有找到OCR结果')
  179. }
  180. // 更新文本块
  181. const updatedOcrData = {
  182. ...existingResult.ocr_data,
  183. textBlocks: newTextBlocks,
  184. updatedAt: new Date().toISOString(),
  185. manuallyCorrected: true
  186. }
  187. const ocrDataJson = JSON.stringify(updatedOcrData)
  188. await db.run(
  189. 'UPDATE ocr_results SET ocr_data = ?, updated_at = CURRENT_TIMESTAMP WHERE file_id = ?',
  190. [ocrDataJson, fileId]
  191. )
  192. await db.close()
  193. return { success: true }
  194. }
  195. }
  196. module.exports = { initDatabase, FileService }