Electron-vue3-ts-offline/scripts/download-ppocrv5.js

197 行
7.4 KiB
JavaScript

2025-11-13 16:34:41 +08:00
// scripts/download-ppocrv5.js
import fs from 'fs-extra';
import path from 'path';
import { fileURLToPath } from 'url';
import { createRequire } from 'module';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);
class PPOCRv5Downloader {
constructor() {
this.modelDir = path.join(process.cwd(), 'models', 'ppocrv5');
this.tempDir = path.join(process.cwd(), 'temp', 'downloads');
// PP-OCRv5 官方模型下载链接
this.modelUrls = {
detection: {
url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx',
filename: 'ch_PP-OCRv5_det_infer.onnx'
},
recognition: {
url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx',
filename: 'ch_PP-OCRv5_rec_infer.onnx'
},
classification: {
url: 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx',
filename: 'ch_ppocr_mobile_v2.0_cls_infer.onnx'
},
keys: {
url: 'https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt',
filename: 'ppocr_keys_v1.txt'
}
};
}
async downloadModels() {
console.log('🚀 开始下载 PP-OCRv5 模型...');
console.log('📝 PP-OCRv5 特性:');
console.log(' - 更高的文本检测准确率');
console.log(' - 更好的小文本识别能力');
console.log(' - 优化的模型结构');
console.log(' - 完全离线运行\n');
try {
// 创建目录结构
await this.createDirectories();
let successCount = 0;
const totalCount = Object.keys(this.modelUrls).length;
// 并行下载所有模型
const downloadPromises = Object.entries(this.modelUrls).map(async ([type, info]) => {
try {
await this.downloadFile(type, info);
successCount++;
console.log(`${this.getTypeName(type)} 下载完成 (${successCount}/${totalCount})`);
} catch (error) {
console.log(`${this.getTypeName(type)} 下载失败: ${error.message}`);
throw error;
}
});
await Promise.all(downloadPromises);
console.log('\n🎉 所有模型下载完成!');
this.displayModelInfo();
} catch (error) {
console.error('\n❌ 下载过程中出现错误:', error.message);
await this.provideAlternativeSources();
}
}
async createDirectories() {
const dirs = [
this.modelDir,
path.join(this.modelDir, 'det'),
path.join(this.modelDir, 'rec'),
path.join(this.modelDir, 'cls'),
path.join(this.modelDir, 'keys'),
this.tempDir
];
for (const dir of dirs) {
await fs.ensureDir(dir);
}
console.log('📁 目录结构创建完成');
}
async downloadFile(type, info) {
const targetPath = this.getTargetPath(type, info.filename);
// 检查文件是否已存在
if (await fs.pathExists(targetPath)) {
const stats = await fs.stat(targetPath);
if (stats.size > this.getMinFileSize(type)) {
console.log(` ⏭️ ${this.getTypeName(type)} 已存在,跳过下载`);
return;
}
}
console.log(` 📥 下载 ${this.getTypeName(type)}...`);
const fetch = await import('node-fetch');
const response = await fetch.default(info.url);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const buffer = await response.buffer();
// 验证文件大小
if (buffer.length < this.getMinFileSize(type)) {
throw new Error(`文件大小异常: ${(buffer.length / 1024 / 1024).toFixed(2)} MB`);
}
await fs.writeFile(targetPath, buffer);
// 验证文件完整性
await this.validateFile(type, targetPath);
}
getTargetPath(type, filename) {
const dirs = {
detection: path.join(this.modelDir, 'det'),
recognition: path.join(this.modelDir, 'rec'),
classification: path.join(this.modelDir, 'cls'),
keys: path.join(this.modelDir, 'keys')
};
return path.join(dirs[type], filename);
}
getTypeName(type) {
const names = {
detection: '检测模型 (PP-OCRv5 Det)',
recognition: '识别模型 (PP-OCRv5 Rec)',
classification: '分类模型 (Cls)',
keys: '字符集文件'
};
return names[type];
}
getMinFileSize(type) {
const sizes = {
detection: 2000000, // 2MB
recognition: 8000000, // 8MB
classification: 1000000, // 1MB
keys: 50000 // 50KB
};
return sizes[type];
}
async validateFile(type, filePath) {
const stats = await fs.stat(filePath);
if (type === 'keys') {
const content = await fs.readFile(filePath, 'utf8');
const lines = content.split('\n').filter(line => line.trim());
if (lines.length < 5000) {
throw new Error('字符集文件不完整');
}
}
console.log(` 📊 文件大小: ${(stats.size / 1024 / 1024).toFixed(2)} MB`);
}
displayModelInfo() {
console.log('\n📂 模型文件位置:');
console.log(` 🎯 检测模型: ${path.join(this.modelDir, 'det', 'ch_PP-OCRv5_det_infer.onnx')}`);
console.log(` 🔤 识别模型: ${path.join(this.modelDir, 'rec', 'ch_PP-OCRv5_rec_infer.onnx')}`);
console.log(` 🧭 分类模型: ${path.join(this.modelDir, 'cls', 'ch_ppocr_mobile_v2.0_cls_infer.onnx')}`);
console.log(` 📝 字符集: ${path.join(this.modelDir, 'keys', 'ppocr_keys_v1.txt')}`);
console.log('\n🚀 使用命令:');
console.log(' yarn dev # 启动应用');
}
async provideAlternativeSources() {
console.log('\n💡 备用下载方案:');
console.log(' 1. 手动下载 PP-OCRv5 模型:');
console.log(' - 检测模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx');
console.log(' - 识别模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx');
console.log(' - 分类模型: https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx');
console.log(' - 字符集: https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt');
console.log('\n 2. 将文件放置到以下目录:');
console.log(` ${this.modelDir}/`);
console.log(' ├── det/ch_PP-OCRv5_det_infer.onnx');
console.log(' ├── rec/ch_PP-OCRv5_rec_infer.onnx');
console.log(' ├── cls/ch_ppocr_mobile_v2.0_cls_infer.onnx');
console.log(' └── keys/ppocr_keys_v1.txt');
}
}
// 执行下载
const downloader = new PPOCRv5Downloader();
downloader.downloadModels().catch(console.error);