| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- // server/utils/textDirectionClassifier.js
- import { Tensor } from 'onnxruntime-node';
- import sharp from 'sharp';
- class TextDirectionClassifier {
- constructor() {
- this.clsSession = null;
- this.config = null;
- }
- initialize(clsSession, config) {
- this.clsSession = clsSession;
- this.config = config;
- }
- async classifyTextDirection(textRegionBuffer) {
- try {
- const inputTensor = await this.prepareClsInput(textRegionBuffer);
- const outputs = await this.clsSession.run({ [this.clsSession.inputNames[0]]: inputTensor });
- return this.postprocessCls(outputs);
- } catch (error) {
- console.error('文本方向分类失败:', error);
- return { clsResult: 0, clsConfidence: 1.0 };
- }
- }
- async prepareClsInput(textRegionBuffer) {
- const targetHeight = 48;
- const targetWidth = 192;
- const resizedBuffer = await sharp(textRegionBuffer)
- .resize(targetWidth, targetHeight)
- .png()
- .toBuffer();
- const imageData = await sharp(resizedBuffer)
- .ensureAlpha()
- .raw()
- .toBuffer({ resolveWithObject: true });
- const inputData = new Float32Array(3 * targetHeight * targetWidth);
- const data = imageData.data;
- const channels = imageData.info.channels;
- for (let i = 0; i < data.length; i += channels) {
- const pixelIndex = Math.floor(i / channels);
- const channel = Math.floor(pixelIndex / (targetHeight * targetWidth));
- const posInChannel = pixelIndex % (targetHeight * targetWidth);
- if (channel < 3) {
- const y = Math.floor(posInChannel / targetWidth);
- const x = posInChannel % targetWidth;
- const inputIndex = channel * targetHeight * targetWidth + y * targetWidth + x;
- if (inputIndex < inputData.length) {
- inputData[inputIndex] = data[i] / 255.0;
- }
- }
- }
- return new Tensor('float32', inputData, [1, 3, targetHeight, targetWidth]);
- }
- postprocessCls(outputs) {
- const outputNames = this.clsSession.outputNames;
- const clsOutput = outputs[outputNames[0]];
- if (!clsOutput) return { clsResult: 0, clsConfidence: 1.0 };
- const data = clsOutput.data;
- let clsResult = 0;
- let clsConfidence = data[0];
- if (data.length >= 2 && data[1] > data[0]) {
- clsResult = 180;
- clsConfidence = data[1];
- }
- console.log(`🧭 文本方向分类: ${clsResult}°, 置信度: ${clsConfidence.toFixed(4)}`);
- return { clsResult, clsConfidence };
- }
- }
- export default TextDirectionClassifier;
|