| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- // scripts/download-ppocrv5.js
- import fs from 'fs-extra';
- import path from 'path';
- import { fileURLToPath } from 'url';
- import { createRequire } from 'module';
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
- const require = createRequire(import.meta.url);
- class PPOCRv5Downloader {
- constructor() {
- this.modelDir = path.join(process.cwd(), 'models', 'ppocrv5');
- this.tempDir = path.join(process.cwd(), 'temp', 'downloads');
- // PP-OCRv5 官方模型下载链接
- this.modelUrls = {
- detection: {
- url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx',
- filename: 'ch_PP-OCRv5_det_infer.onnx'
- },
- recognition: {
- url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx',
- filename: 'ch_PP-OCRv5_rec_infer.onnx'
- },
- classification: {
- url: 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx',
- filename: 'ch_ppocr_mobile_v2.0_cls_infer.onnx'
- },
- keys: {
- url: 'https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt',
- filename: 'ppocr_keys_v1.txt'
- }
- };
- }
- async downloadModels() {
- console.log('🚀 开始下载 PP-OCRv5 模型...');
- console.log('📝 PP-OCRv5 特性:');
- console.log(' - 更高的文本检测准确率');
- console.log(' - 更好的小文本识别能力');
- console.log(' - 优化的模型结构');
- console.log(' - 完全离线运行\n');
- try {
- // 创建目录结构
- await this.createDirectories();
- let successCount = 0;
- const totalCount = Object.keys(this.modelUrls).length;
- // 并行下载所有模型
- const downloadPromises = Object.entries(this.modelUrls).map(async ([type, info]) => {
- try {
- await this.downloadFile(type, info);
- successCount++;
- console.log(` ✅ ${this.getTypeName(type)} 下载完成 (${successCount}/${totalCount})`);
- } catch (error) {
- console.log(` ❌ ${this.getTypeName(type)} 下载失败: ${error.message}`);
- throw error;
- }
- });
- await Promise.all(downloadPromises);
- console.log('\n🎉 所有模型下载完成!');
- this.displayModelInfo();
- } catch (error) {
- console.error('\n❌ 下载过程中出现错误:', error.message);
- await this.provideAlternativeSources();
- }
- }
- async createDirectories() {
- const dirs = [
- this.modelDir,
- path.join(this.modelDir, 'det'),
- path.join(this.modelDir, 'rec'),
- path.join(this.modelDir, 'cls'),
- path.join(this.modelDir, 'keys'),
- this.tempDir
- ];
- for (const dir of dirs) {
- await fs.ensureDir(dir);
- }
- console.log('📁 目录结构创建完成');
- }
- async downloadFile(type, info) {
- const targetPath = this.getTargetPath(type, info.filename);
- // 检查文件是否已存在
- if (await fs.pathExists(targetPath)) {
- const stats = await fs.stat(targetPath);
- if (stats.size > this.getMinFileSize(type)) {
- console.log(` ⏭️ ${this.getTypeName(type)} 已存在,跳过下载`);
- return;
- }
- }
- console.log(` 📥 下载 ${this.getTypeName(type)}...`);
- const fetch = await import('node-fetch');
- const response = await fetch.default(info.url);
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
- }
- const buffer = await response.buffer();
- // 验证文件大小
- if (buffer.length < this.getMinFileSize(type)) {
- throw new Error(`文件大小异常: ${(buffer.length / 1024 / 1024).toFixed(2)} MB`);
- }
- await fs.writeFile(targetPath, buffer);
- // 验证文件完整性
- await this.validateFile(type, targetPath);
- }
- getTargetPath(type, filename) {
- const dirs = {
- detection: path.join(this.modelDir, 'det'),
- recognition: path.join(this.modelDir, 'rec'),
- classification: path.join(this.modelDir, 'cls'),
- keys: path.join(this.modelDir, 'keys')
- };
- return path.join(dirs[type], filename);
- }
- getTypeName(type) {
- const names = {
- detection: '检测模型 (PP-OCRv5 Det)',
- recognition: '识别模型 (PP-OCRv5 Rec)',
- classification: '分类模型 (Cls)',
- keys: '字符集文件'
- };
- return names[type];
- }
- getMinFileSize(type) {
- const sizes = {
- detection: 2000000, // 2MB
- recognition: 8000000, // 8MB
- classification: 1000000, // 1MB
- keys: 50000 // 50KB
- };
- return sizes[type];
- }
- async validateFile(type, filePath) {
- const stats = await fs.stat(filePath);
- if (type === 'keys') {
- const content = await fs.readFile(filePath, 'utf8');
- const lines = content.split('\n').filter(line => line.trim());
- if (lines.length < 5000) {
- throw new Error('字符集文件不完整');
- }
- }
- console.log(` 📊 文件大小: ${(stats.size / 1024 / 1024).toFixed(2)} MB`);
- }
- displayModelInfo() {
- console.log('\n📂 模型文件位置:');
- console.log(` 🎯 检测模型: ${path.join(this.modelDir, 'det', 'ch_PP-OCRv5_det_infer.onnx')}`);
- console.log(` 🔤 识别模型: ${path.join(this.modelDir, 'rec', 'ch_PP-OCRv5_rec_infer.onnx')}`);
- console.log(` 🧭 分类模型: ${path.join(this.modelDir, 'cls', 'ch_ppocr_mobile_v2.0_cls_infer.onnx')}`);
- console.log(` 📝 字符集: ${path.join(this.modelDir, 'keys', 'ppocr_keys_v1.txt')}`);
- console.log('\n🚀 使用命令:');
- console.log(' yarn dev # 启动应用');
- }
- async provideAlternativeSources() {
- console.log('\n💡 备用下载方案:');
- console.log(' 1. 手动下载 PP-OCRv5 模型:');
- console.log(' - 检测模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx');
- console.log(' - 识别模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx');
- console.log(' - 分类模型: https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx');
- console.log(' - 字符集: https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt');
- console.log('\n 2. 将文件放置到以下目录:');
- console.log(` ${this.modelDir}/`);
- console.log(' ├── det/ch_PP-OCRv5_det_infer.onnx');
- console.log(' ├── rec/ch_PP-OCRv5_rec_infer.onnx');
- console.log(' ├── cls/ch_ppocr_mobile_v2.0_cls_infer.onnx');
- console.log(' └── keys/ppocr_keys_v1.txt');
- }
- }
- // 执行下载
- const downloader = new PPOCRv5Downloader();
- downloader.downloadModels().catch(console.error);
|