Electron-vue3-ts-offline/scripts/download-ppocrv5.js

// scripts/download-ppocrv5.js
import fs from 'fs-extra';
import path from 'path';
import { fileURLToPath } from 'url';
import { createRequire } from 'module';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const require = createRequire(import.meta.url);

class PPOCRv5Downloader {
    constructor() {
        this.modelDir = path.join(process.cwd(), 'models', 'ppocrv5');
        this.tempDir = path.join(process.cwd(), 'temp', 'downloads');

        // PP-OCRv5 官方模型下载链接
        this.modelUrls = {
            detection: {
                url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx',
                filename: 'ch_PP-OCRv5_det_infer.onnx'
            },
            recognition: {
                url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx',
                filename: 'ch_PP-OCRv5_rec_infer.onnx'
            },
            classification: {
                url: 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx',
                filename: 'ch_ppocr_mobile_v2.0_cls_infer.onnx'
            },
            keys: {
                url: 'https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt',
                filename: 'ppocr_keys_v1.txt'
            }
        };
    }

    async downloadModels() {
        console.log('🚀 开始下载 PP-OCRv5 模型...');
        console.log('📝 PP-OCRv5 特性:');
        console.log('   - 更高的文本检测准确率');
        console.log('   - 更好的小文本识别能力');
        console.log('   - 优化的模型结构');
        console.log('   - 完全离线运行\n');

        try {
            // 创建目录结构
            await this.createDirectories();

            let successCount = 0;
            const totalCount = Object.keys(this.modelUrls).length;

            // 并行下载所有模型
            const downloadPromises = Object.entries(this.modelUrls).map(async ([type, info]) => {
                try {
                    await this.downloadFile(type, info);
                    successCount++;
                    console.log(`   ✅ ${this.getTypeName(type)} 下载完成 (${successCount}/${totalCount})`);
                } catch (error) {
                    console.log(`   ❌ ${this.getTypeName(type)} 下载失败: ${error.message}`);
                    throw error;
                }
            });

            await Promise.all(downloadPromises);

            console.log('\n🎉 所有模型下载完成！');
            this.displayModelInfo();

        } catch (error) {
            console.error('\n❌ 下载过程中出现错误:', error.message);
            await this.provideAlternativeSources();
        }
    }

    async createDirectories() {
        const dirs = [
            this.modelDir,
            path.join(this.modelDir, 'det'),
            path.join(this.modelDir, 'rec'),
            path.join(this.modelDir, 'cls'),
            path.join(this.modelDir, 'keys'),
            this.tempDir
        ];

        for (const dir of dirs) {
            await fs.ensureDir(dir);
        }
        console.log('📁 目录结构创建完成');
    }

    async downloadFile(type, info) {
        const targetPath = this.getTargetPath(type, info.filename);

        // 检查文件是否已存在
        if (await fs.pathExists(targetPath)) {
            const stats = await fs.stat(targetPath);
            if (stats.size > this.getMinFileSize(type)) {
                console.log(`   ⏭️ ${this.getTypeName(type)} 已存在，跳过下载`);
                return;
            }
        }

        console.log(`   📥 下载 ${this.getTypeName(type)}...`);

        const fetch = await import('node-fetch');
        const response = await fetch.default(info.url);

        if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
        }

        const buffer = await response.buffer();

        // 验证文件大小
        if (buffer.length < this.getMinFileSize(type)) {
            throw new Error(`文件大小异常: ${(buffer.length / 1024 / 1024).toFixed(2)} MB`);
        }

        await fs.writeFile(targetPath, buffer);

        // 验证文件完整性
        await this.validateFile(type, targetPath);
    }

    getTargetPath(type, filename) {
        const dirs = {
            detection: path.join(this.modelDir, 'det'),
            recognition: path.join(this.modelDir, 'rec'),
            classification: path.join(this.modelDir, 'cls'),
            keys: path.join(this.modelDir, 'keys')
        };
        return path.join(dirs[type], filename);
    }

    getTypeName(type) {
        const names = {
            detection: '检测模型 (PP-OCRv5 Det)',
            recognition: '识别模型 (PP-OCRv5 Rec)',
            classification: '分类模型 (Cls)',
            keys: '字符集文件'
        };
        return names[type];
    }

    getMinFileSize(type) {
        const sizes = {
            detection: 2000000,    // 2MB
            recognition: 8000000,  // 8MB
            classification: 1000000, // 1MB
            keys: 50000           // 50KB
        };
        return sizes[type];
    }

    async validateFile(type, filePath) {
        const stats = await fs.stat(filePath);

        if (type === 'keys') {
            const content = await fs.readFile(filePath, 'utf8');
            const lines = content.split('\n').filter(line => line.trim());
            if (lines.length < 5000) {
                throw new Error('字符集文件不完整');
            }
        }

        console.log(`      📊 文件大小: ${(stats.size / 1024 / 1024).toFixed(2)} MB`);
    }

    displayModelInfo() {
        console.log('\n📂 模型文件位置:');
        console.log(`   🎯 检测模型: ${path.join(this.modelDir, 'det', 'ch_PP-OCRv5_det_infer.onnx')}`);
        console.log(`   🔤 识别模型: ${path.join(this.modelDir, 'rec', 'ch_PP-OCRv5_rec_infer.onnx')}`);
        console.log(`   🧭 分类模型: ${path.join(this.modelDir, 'cls', 'ch_ppocr_mobile_v2.0_cls_infer.onnx')}`);
        console.log(`   📝 字符集: ${path.join(this.modelDir, 'keys', 'ppocr_keys_v1.txt')}`);

        console.log('\n🚀 使用命令:');
        console.log('   yarn dev  # 启动应用');
    }

    async provideAlternativeSources() {
        console.log('\n💡 备用下载方案:');
        console.log('   1. 手动下载 PP-OCRv5 模型:');
        console.log('      - 检测模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx');
        console.log('      - 识别模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx');
        console.log('      - 分类模型: https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx');
        console.log('      - 字符集: https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt');
        console.log('\n   2. 将文件放置到以下目录:');
        console.log(`      ${this.modelDir}/`);
        console.log('      ├── det/ch_PP-OCRv5_det_infer.onnx');
        console.log('      ├── rec/ch_PP-OCRv5_rec_infer.onnx');
        console.log('      ├── cls/ch_ppocr_mobile_v2.0_cls_infer.onnx');
        console.log('      └── keys/ppocr_keys_v1.txt');
    }
}

// 执行下载
const downloader = new PPOCRv5Downloader();
downloader.downloadModels().catch(console.error);
init 2025-11-13 16:34:41 +08:00			`// scripts/download-ppocrv5.js`
			`import fs from 'fs-extra';`
			`import path from 'path';`
			`import { fileURLToPath } from 'url';`
			`import { createRequire } from 'module';`

			`const __dirname = path.dirname(fileURLToPath(import.meta.url));`
			`const require = createRequire(import.meta.url);`

			`class PPOCRv5Downloader {`
			`constructor() {`
			`this.modelDir = path.join(process.cwd(), 'models', 'ppocrv5');`
			`this.tempDir = path.join(process.cwd(), 'temp', 'downloads');`

			`// PP-OCRv5 官方模型下载链接`
			`this.modelUrls = {`
			`detection: {`
			`url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx',`
			`filename: 'ch_PP-OCRv5_det_infer.onnx'`
			`},`
			`recognition: {`
			`url: 'https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx',`
			`filename: 'ch_PP-OCRv5_rec_infer.onnx'`
			`},`
			`classification: {`
			`url: 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx',`
			`filename: 'ch_ppocr_mobile_v2.0_cls_infer.onnx'`
			`},`
			`keys: {`
			`url: 'https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt',`
			`filename: 'ppocr_keys_v1.txt'`
			`}`
			`};`
			`}`

			`async downloadModels() {`
			`console.log('🚀 开始下载 PP-OCRv5 模型...');`
			`console.log('📝 PP-OCRv5 特性:');`
			`console.log(' - 更高的文本检测准确率');`
			`console.log(' - 更好的小文本识别能力');`
			`console.log(' - 优化的模型结构');`
			`console.log(' - 完全离线运行\n');`

			`try {`
			`// 创建目录结构`
			`await this.createDirectories();`

			`let successCount = 0;`
			`const totalCount = Object.keys(this.modelUrls).length;`

			`// 并行下载所有模型`
			`const downloadPromises = Object.entries(this.modelUrls).map(async ([type, info]) => {`
			`try {`
			`await this.downloadFile(type, info);`
			`successCount++;`
			console.log(` ✅ ${this.getTypeName(type)} 下载完成 (${successCount}/${totalCount})`);
			`} catch (error) {`
			console.log(` ❌ ${this.getTypeName(type)} 下载失败: ${error.message}`);
			`throw error;`
			`}`
			`});`

			`await Promise.all(downloadPromises);`

			`console.log('\n🎉 所有模型下载完成！');`
			`this.displayModelInfo();`

			`} catch (error) {`
			`console.error('\n❌ 下载过程中出现错误:', error.message);`
			`await this.provideAlternativeSources();`
			`}`
			`}`

			`async createDirectories() {`
			`const dirs = [`
			`this.modelDir,`
			`path.join(this.modelDir, 'det'),`
			`path.join(this.modelDir, 'rec'),`
			`path.join(this.modelDir, 'cls'),`
			`path.join(this.modelDir, 'keys'),`
			`this.tempDir`
			`];`

			`for (const dir of dirs) {`
			`await fs.ensureDir(dir);`
			`}`
			`console.log('📁 目录结构创建完成');`
			`}`

			`async downloadFile(type, info) {`
			`const targetPath = this.getTargetPath(type, info.filename);`

			`// 检查文件是否已存在`
			`if (await fs.pathExists(targetPath)) {`
			`const stats = await fs.stat(targetPath);`
			`if (stats.size > this.getMinFileSize(type)) {`
			console.log(` ⏭️ ${this.getTypeName(type)} 已存在，跳过下载`);
			`return;`
			`}`
			`}`

			console.log(` 📥 下载 ${this.getTypeName(type)}...`);

			`const fetch = await import('node-fetch');`
			`const response = await fetch.default(info.url);`

			`if (!response.ok) {`
			throw new Error(`HTTP ${response.status}: ${response.statusText}`);
			`}`

			`const buffer = await response.buffer();`

			`// 验证文件大小`
			`if (buffer.length < this.getMinFileSize(type)) {`
			throw new Error(`文件大小异常: ${(buffer.length / 1024 / 1024).toFixed(2)} MB`);
			`}`

			`await fs.writeFile(targetPath, buffer);`

			`// 验证文件完整性`
			`await this.validateFile(type, targetPath);`
			`}`

			`getTargetPath(type, filename) {`
			`const dirs = {`
			`detection: path.join(this.modelDir, 'det'),`
			`recognition: path.join(this.modelDir, 'rec'),`
			`classification: path.join(this.modelDir, 'cls'),`
			`keys: path.join(this.modelDir, 'keys')`
			`};`
			`return path.join(dirs[type], filename);`
			`}`

			`getTypeName(type) {`
			`const names = {`
			`detection: '检测模型 (PP-OCRv5 Det)',`
			`recognition: '识别模型 (PP-OCRv5 Rec)',`
			`classification: '分类模型 (Cls)',`
			`keys: '字符集文件'`
			`};`
			`return names[type];`
			`}`

			`getMinFileSize(type) {`
			`const sizes = {`
			`detection: 2000000, // 2MB`
			`recognition: 8000000, // 8MB`
			`classification: 1000000, // 1MB`
			`keys: 50000 // 50KB`
			`};`
			`return sizes[type];`
			`}`

			`async validateFile(type, filePath) {`
			`const stats = await fs.stat(filePath);`

			`if (type === 'keys') {`
			`const content = await fs.readFile(filePath, 'utf8');`
			`const lines = content.split('\n').filter(line => line.trim());`
			`if (lines.length < 5000) {`
			`throw new Error('字符集文件不完整');`
			`}`
			`}`

			console.log(` 📊 文件大小: ${(stats.size / 1024 / 1024).toFixed(2)} MB`);
			`}`

			`displayModelInfo() {`
			`console.log('\n📂 模型文件位置:');`
			console.log(` 🎯 检测模型: ${path.join(this.modelDir, 'det', 'ch_PP-OCRv5_det_infer.onnx')}`);
			console.log(` 🔤 识别模型: ${path.join(this.modelDir, 'rec', 'ch_PP-OCRv5_rec_infer.onnx')}`);
			console.log(` 🧭 分类模型: ${path.join(this.modelDir, 'cls', 'ch_ppocr_mobile_v2.0_cls_infer.onnx')}`);
			console.log(` 📝 字符集: ${path.join(this.modelDir, 'keys', 'ppocr_keys_v1.txt')}`);

			`console.log('\n🚀 使用命令:');`
			`console.log(' yarn dev # 启动应用');`
			`}`

			`async provideAlternativeSources() {`
			`console.log('\n💡 备用下载方案:');`
			`console.log(' 1. 手动下载 PP-OCRv5 模型:');`
			`console.log(' - 检测模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_det_infer.onnx');`
			`console.log(' - 识别模型: https://paddleocr.bj.bcebos.com/PP-OCRv5/chinese/ch_PP-OCRv5_rec_infer.onnx');`
			`console.log(' - 分类模型: https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.onnx');`
			`console.log(' - 字符集: https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/ppocr/utils/ppocr_keys_v1.txt');`
			`console.log('\n 2. 将文件放置到以下目录:');`
			console.log(` ${this.modelDir}/`);
			`console.log(' ├── det/ch_PP-OCRv5_det_infer.onnx');`
			`console.log(' ├── rec/ch_PP-OCRv5_rec_infer.onnx');`
			`console.log(' ├── cls/ch_ppocr_mobile_v2.0_cls_infer.onnx');`
			`console.log(' └── keys/ppocr_keys_v1.txt');`
			`}`
			`}`

			`// 执行下载`
			`const downloader = new PPOCRv5Downloader();`
			`downloader.downloadModels().catch(console.error);`