xuqinmin12
/
Electron-vue3-ts-offline


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
							// scripts/download-ppocrv5.js
import fs from 'fs-extra';
import path from 'path';
import { fileURLToPath } from 'url';
import { createRequire } from 'module';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);

class PPOCRv5Downloader {
    constructor() {
        this.modelDir = path.join(process.cwd(), 'models', 'ppocrv5');
        this.tempDir = path.join(process.cwd(), 'temp', 'downloads');

        // PP-OCRv5 官方模型下载链接
        this.modelUrls = {
            detection: {
                url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx',
                filename: 'ch_PP-OCRv5_det_infer.onnx'
            },
            recognition: {
                url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx',
                filename: 'ch_PP-OCRv5_rec_infer.onnx'
            },
            classification: {
                url: 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx',
                filename: 'ch_ppocr_mobile_v2.0_cls_infer.onnx'
            },
            keys: {
                url: 'https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt',
                filename: 'ppocr_keys_v1.txt'
            }
        };
    }

    async downloadModels() {
        console.log('🚀 开始下载 PP-OCRv5 模型...');
        console.log('📝 PP-OCRv5 特性:');
        console.log('   - 更高的文本检测准确率');
        console.log('   - 更好的小文本识别能力');
        console.log('   - 优化的模型结构');
        console.log('   - 完全离线运行\n');

        try {
            // 创建目录结构
            await this.createDirectories();

            let successCount = 0;
            const totalCount = Object.keys(this.modelUrls).length;

            // 并行下载所有模型
            const downloadPromises = Object.entries(this.modelUrls).map(async ([type, info]) => {
                try {
                    await this.downloadFile(type, info);
                    successCount++;
                    console.log(`   ✅ ${this.getTypeName(type)} 下载完成 (${successCount}/${totalCount})`);
                } catch (error) {
                    console.log(`   ❌ ${this.getTypeName(type)} 下载失败: ${error.message}`);
                    throw error;
                }
            });

            await Promise.all(downloadPromises);

            console.log('\n🎉 所有模型下载完成！');
            this.displayModelInfo();

        } catch (error) {
            console.error('\n❌ 下载过程中出现错误:', error.message);
            await this.provideAlternativeSources();
        }
    }

    async createDirectories() {
        const dirs = [
            this.modelDir,
            path.join(this.modelDir, 'det'),
            path.join(this.modelDir, 'rec'),
            path.join(this.modelDir, 'cls'),
            path.join(this.modelDir, 'keys'),
            this.tempDir
        ];

        for (const dir of dirs) {
            await fs.ensureDir(dir);
        }
        console.log('📁 目录结构创建完成');
    }

    async downloadFile(type, info) {
        const targetPath = this.getTargetPath(type, info.filename);

        // 检查文件是否已存在
        if (await fs.pathExists(targetPath)) {
            const stats = await fs.stat(targetPath);
            if (stats.size > this.getMinFileSize(type)) {
                console.log(`   ⏭️ ${this.getTypeName(type)} 已存在，跳过下载`);
                return;
            }
        }

        console.log(`   📥 下载 ${this.getTypeName(type)}...`);

        const fetch = await import('node-fetch');
        const response = await fetch.default(info.url);

        if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
        }

        const buffer = await response.buffer();

        // 验证文件大小
        if (buffer.length < this.getMinFileSize(type)) {
            throw new Error(`文件大小异常: ${(buffer.length / 1024 / 1024).toFixed(2)} MB`);
        }

        await fs.writeFile(targetPath, buffer);

        // 验证文件完整性
        await this.validateFile(type, targetPath);
    }

    getTargetPath(type, filename) {
        const dirs = {
            detection: path.join(this.modelDir, 'det'),
            recognition: path.join(this.modelDir, 'rec'),
            classification: path.join(this.modelDir, 'cls'),
            keys: path.join(this.modelDir, 'keys')
        };
        return path.join(dirs[type], filename);
    }

    getTypeName(type) {
        const names = {
            detection: '检测模型 (PP-OCRv5 Det)',
            recognition: '识别模型 (PP-OCRv5 Rec)',
            classification: '分类模型 (Cls)',
            keys: '字符集文件'
        };
        return names[type];
    }

    getMinFileSize(type) {
        const sizes = {
            detection: 2000000,    // 2MB
            recognition: 8000000,  // 8MB
            classification: 1000000, // 1MB
            keys: 50000           // 50KB
        };
        return sizes[type];
    }

    async validateFile(type, filePath) {
        const stats = await fs.stat(filePath);

        if (type === 'keys') {
            const content = await fs.readFile(filePath, 'utf8');
            const lines = content.split('\n').filter(line => line.trim());
            if (lines.length < 5000) {
                throw new Error('字符集文件不完整');
            }
        }

        console.log(`      📊 文件大小: ${(stats.size / 1024 / 1024).toFixed(2)} MB`);
    }

    displayModelInfo() {
        console.log('\n📂 模型文件位置:');
        console.log(`   🎯 检测模型: ${path.join(this.modelDir, 'det', 'ch_PP-OCRv5_det_infer.onnx')}`);
        console.log(`   🔤 识别模型: ${path.join(this.modelDir, 'rec', 'ch_PP-OCRv5_rec_infer.onnx')}`);
        console.log(`   🧭 分类模型: ${path.join(this.modelDir, 'cls', 'ch_ppocr_mobile_v2.0_cls_infer.onnx')}`);
        console.log(`   📝 字符集: ${path.join(this.modelDir, 'keys', 'ppocr_keys_v1.txt')}`);

        console.log('\n🚀 使用命令:');
        console.log('   yarn dev  # 启动应用');
    }

    async provideAlternativeSources() {
        console.log('\n💡 备用下载方案:');
        console.log('   1. 手动下载 PP-OCRv5 模型:');
        console.log('      - 检测模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx');
        console.log('      - 识别模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx');
        console.log('      - 分类模型: https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx');
        console.log('      - 字符集: https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt');
        console.log('\n   2. 将文件放置到以下目录:');
        console.log(`      ${this.modelDir}/`);
        console.log('      ├── det/ch_PP-OCRv5_det_infer.onnx');
        console.log('      ├── rec/ch_PP-OCRv5_rec_infer.onnx');
        console.log('      ├── cls/ch_ppocr_mobile_v2.0_cls_infer.onnx');
        console.log('      └── keys/ppocr_keys_v1.txt');
    }
}

// 执行下载
const downloader = new PPOCRv5Downloader();
downloader.downloadModels().catch(console.error);