vor 2 Monaten · f8b26a40a5
--- a/nodejs/ef-compiler/actions/echo-parser.js
+++ b/nodejs/ef-compiler/actions/echo-parser.js
@@ -1,6 +1,28 @@
 
				-/** 语句：echo 打印信息（写入 log + UI） */
			
 
				+/** 语句：echo 打印信息（写入 log + UI）；统一结点报错打印逻辑 */
			
 
				 const types = ['echo']
			
 
				 
			
 
				+/**
			
 
				+ * 统一打印结点报错：写入 log.txt 并可选通知 UI。所有结点失败时均由此处输出，无需在各结点内单独 logMessage。
			
 
				+ * @param {object} action - 当前执行的 action
			
 
				+ * @param {{ success: boolean, error?: string }} result - 执行结果（success 为 false 时应有 error）
			
 
				+ * @param {{ getActionName: function, logMessage: function, folderPath: string }} ctx - 上下文
			
 
				+ */
			
 
				+async function logActionError(action, result, ctx) {
			
 
				+  if (result && result.success) return
			
 
				+  const { getActionName, logMessage, folderPath } = ctx
			
 
				+  const now = new Date()
			
 
				+  const timeStr = `${now.getFullYear()}/${String(now.getMonth() + 1).padStart(2, '0')}/${String(now.getDate()).padStart(2, '0')} ${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}:${String(now.getSeconds()).padStart(2, '0')}`
			
 
				+  const errDetail = result && result.error != null && String(result.error).trim() !== '' ? String(result.error) : 'unknown'
			
 
				+  const actionName = typeof getActionName === 'function' ? getActionName(action) : (action && action.type) || 'unknown'
			
 
				+  const errorMsg = `[sequence-runner] [ERROR] ${actionName} failed: ${errDetail} [time: ${timeStr}]`
			
 
				+  await logMessage(errorMsg, folderPath).catch(() => {})
			
 
				+  if (typeof window !== 'undefined') {
			
 
				+    try {
			
 
				+      window.dispatchEvent(new CustomEvent('log-message', { detail: { message: errorMsg, isError: true } }))
			
 
				+    } catch (_) {}
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 function parse(action, parseContext) {
			
 
				   const { extractVarName } = parseContext
			
 
				   const parsed = { type: 'echo' }
			
@@ -41,4 +63,4 @@ async function execute(action, ctx) {
 
				   return { success: true }
			
 
				 }
			
 
				 
			
 
				-module.exports = { types, parse, execute }
			
 
				+module.exports = { types, parse, execute, logActionError }
			
--- a/nodejs/ef-compiler/actions/fun/fun-parser.js
+++ b/nodejs/ef-compiler/actions/fun/fun-parser.js
@@ -10,6 +10,7 @@ const LEGACY_FUN_TYPES = [
 
				   'fun', 'ai',
			
 
				   'read-txt', 'read-text', 'save-txt', 'save-text',
			
 
				   'img-bounding-box-location', 'img-center-point-location', 'img-cropping',
			
 
				+  'ocr',
			
 
				   'read-last-message', 'smart-chat-append',
			
 
				   'extract-messages', 'ocr-chat', 'ocr-chat-history', 'extract-chat-history',
			
 
				   'save-messages', 'generate-summary', 'generate-history-summary',
			
@@ -151,6 +152,11 @@ function parse(action, parseContext) {
 
				       parsed.savePath = action.inVars?.[1] ?? action.savePath
			
 
				       if (action.outVars && action.outVars.length > 0) parsed.variable = extractVarName(action.outVars[0])
			
 
				       break
			
 
				+    case 'ocr':
			
 
				+      parsed.inVars = action.inVars && Array.isArray(action.inVars) ? action.inVars.map(v => extractVarName(v)) : []
			
 
				+      parsed.image = action.inVars && Array.isArray(action.inVars) && action.inVars.length > 0 ? action.inVars[0] : action.image
			
 
				+      parsed.variable = action.outVars && Array.isArray(action.outVars) && action.outVars.length > 0 ? extractVarName(action.outVars[0]) : (action.variable ? extractVarName(action.variable) : undefined)
			
 
				+      break
			
 
				     default:
			
 
				       parsed.inVars = action.inVars && Array.isArray(action.inVars) ? action.inVars.map(v => extractVarName(v)) : []
			
 
				       parsed.outVars = action.outVars && Array.isArray(action.outVars) ? action.outVars.map(v => extractVarName(v)) : []
			
@@ -221,6 +227,8 @@ function get(funcDir, category) {
 
				         executeImgBoundingBoxLocation: require(path.join(funcDir, 'img-bounding-box-location.js')).executeImgBoundingBoxLocation,
			
 
				         executeImgCenterPointLocation: require(path.join(funcDir, 'img-center-point-location.js')).executeImgCenterPointLocation,
			
 
				         executeImgCropping: require(path.join(funcDir, 'img-cropping.js')).executeImgCropping,
			
 
				+        executeOcr: require(path.join(funcDir, 'ocr.js')).executeOcr,
			
 
				+        executeOcrFindText: require(path.join(funcDir, 'ocr.js')).executeOcrFindText,
			
 
				       }
			
 
				       break
			
 
				     case 'io':
			
@@ -348,6 +356,37 @@ async function run(actionType, action, ctx, device, folderPath) {
 
				       return { success: true }
			
 
				     }
			
 
				 
			
 
				+    case 'ocr': {
			
 
				+      const { executeOcr, executeOcrFindText } = get(funcDir, 'img')
			
 
				+      let imageOrText = action.inVars && Array.isArray(action.inVars) && action.inVars.length > 0 ? action.inVars[0] : action.image
			
 
				+      if (imageOrText == null || String(imageOrText).trim() === '') return { success: false, error: 'ocr 缺少参数：图片路径或要查找的文字（inVars[0] / image）' }
			
 
				+      const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : (ctx.compilerConfig && ctx.compilerConfig.projectRoot) || process.cwd()
			
 
				+      const fs = require('fs')
			
 
				+      const isAbsoluteOrDrive = String(imageOrText).startsWith('/') || String(imageOrText).includes(':')
			
 
				+      const hasSubPath = String(imageOrText).includes('/') || String(imageOrText).includes(path.sep)
			
 
				+      const resolvedPath = isAbsoluteOrDrive ? imageOrText : (hasSubPath ? path.join(baseDir, imageOrText) : path.join(baseDir, 'resources', imageOrText))
			
 
				+      const isImagePath = fs.existsSync(resolvedPath) && fs.statSync(resolvedPath).isFile()
			
 
				+      const outputVarName = action.outVars && Array.isArray(action.outVars) && action.outVars.length > 0 ? extractVarName(String(action.outVars[0]).trim()) : (action.variable ? extractVarName(action.variable) : null)
			
 
				+      if (isImagePath) {
			
 
				+        const result = await executeOcr({ imagePath: imageOrText, folderPath })
			
 
				+        if (!result.success) return { success: false, error: result.error }
			
 
				+        if (outputVarName) {
			
 
				+          variableContext[outputVarName] = result.text != null ? String(result.text) : ''
			
 
				+          await logOutVars(action, variableContext, folderPath)
			
 
				+        }
			
 
				+        return { success: true, result: result.text }
			
 
				+      }
			
 
				+      if (!device) return { success: false, error: 'ocr 按文字查找需设备截图，当前无设备' }
			
 
				+      const findResult = await executeOcrFindText({ device, findText: String(imageOrText).trim(), folderPath })
			
 
				+      if (!findResult.success) return { success: false, error: findResult.error }
			
 
				+      if (outputVarName) {
			
 
				+        variableContext[outputVarName] = findResult.center && typeof findResult.center === 'object'
			
 
				+          ? JSON.stringify({ x: findResult.center.x, y: findResult.center.y }) : ''
			
 
				+        await logOutVars(action, variableContext, folderPath)
			
 
				+      }
			
 
				+      return { success: true, result: findResult.center }
			
 
				+    }
			
 
				+
			
 
				     case 'read-last-message': {
			
 
				       const { executeReadLastMessage } = get(funcDir, 'io')
			
 
				       const inputVars = action.inVars || action.inputVars || []
			
@@ -660,7 +699,7 @@ async function run(actionType, action, ctx, device, folderPath) {
 
				 
			
 
				 const FUN_TYPES = new Set([
			
 
				   'ai',
			
 
				-  'img-bounding-box-location', 'img-center-point-location', 'img-cropping',
			
 
				+  'img-bounding-box-location', 'img-center-point-location', 'img-cropping', 'ocr',
			
 
				   'read-last-message', 'read-txt', 'read-text', 'smart-chat-append', 'save-txt', 'save-text',
			
 
				   'extract-messages', 'ocr-chat', 'ocr-chat-history', 'extract-chat-history',
			
 
				   'save-messages', 'generate-summary', 'generate-history-summary',
			
--- a/nodejs/ef-compiler/actions/fun/ocr.js
+++ b/nodejs/ef-compiler/actions/fun/ocr.js
@@ -0,0 +1,127 @@
 
				+/**
			
 
				+ * fun 标签：ocr（OnnxOCR 识别）
			
 
				+ * 1）image 为图片路径时：对图片做 OCR，识别全文写入变量。
			
 
				+ * 2）image 为要查找的文字时：对设备截图做 OCR，在图中查找该文字，返回中心点坐标写入变量（需有设备）。
			
 
				+ */
			
 
				+
			
 
				+const path = require('path')
			
 
				+const fs = require('fs')
			
 
				+const os = require('os')
			
 
				+const { spawnSync } = require('child_process')
			
 
				+const { captureScreenshot } = require('../../../adb/adb-screencap.js')
			
 
				+
			
 
				+const configPath = process.env.STATIC_ROOT
			
 
				+  ? path.join(path.dirname(process.env.STATIC_ROOT), 'configs', 'config.js')
			
 
				+  : path.join(__dirname, '..', '..', '..', '..', 'configs', 'config.js')
			
 
				+const projectRoot = path.dirname(path.dirname(path.resolve(configPath)))
			
 
				+const config = fs.existsSync(configPath) ? require(configPath) : {}
			
 
				+const ocrScriptPath = path.join(projectRoot, 'python', 'scripts', 'ocr-onnx.py')
			
 
				+
			
 
				+const tagName = 'ocr'
			
 
				+
			
 
				+const schema = {
			
 
				+  description: 'OCR：传入图片路径则识别全文；传入要查找的文字则在设备截图中定位该文字并返回中心点坐标。',
			
 
				+  inputs: { image: '图片路径 或 要查找的文字', variable: '输出变量名（保存识别文本或中心点 {"x", "y"}）' },
			
 
				+  outputs: { variable: '识别文本 或 中心点 JSON' },
			
 
				+}
			
 
				+
			
 
				+function getPythonPath() {
			
 
				+  const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64')
			
 
				+  const envPy = path.join(base, 'env', 'Scripts', 'python.exe')
			
 
				+  const scriptsPy = path.join(base, 'Scripts', 'python.exe')
			
 
				+  const pyEmbedded = path.join(base, 'py', 'python.exe')
			
 
				+  if (fs.existsSync(envPy)) return envPy
			
 
				+  if (fs.existsSync(scriptsPy)) return scriptsPy
			
 
				+  if (fs.existsSync(pyEmbedded)) return pyEmbedded
			
 
				+  return 'python'
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * 对指定图片执行 OnnxOCR 识别
			
 
				+ * @param {{ imagePath: string, folderPath?: string }} input - imagePath 图片路径（已解析后的相对或绝对路径）, folderPath 流程目录
			
 
				+ * @returns {{ success: boolean, text?: string, error?: string }}
			
 
				+ */
			
 
				+async function executeOcr({ imagePath, folderPath }) {
			
 
				+  if (!imagePath || typeof imagePath !== 'string') {
			
 
				+    return { success: false, error: '缺少图片路径' }
			
 
				+  }
			
 
				+  const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot
			
 
				+  const isAbsoluteOrDrive = imagePath.startsWith('/') || imagePath.includes(':')
			
 
				+  const hasSubPath = imagePath.includes('/') || imagePath.includes(path.sep)
			
 
				+  const resolvedImage = isAbsoluteOrDrive ? imagePath : (hasSubPath ? path.join(baseDir, imagePath) : path.join(baseDir, 'resources', imagePath))
			
 
				+
			
 
				+  if (!fs.existsSync(ocrScriptPath)) {
			
 
				+    return { success: false, error: `OCR 脚本不存在: ${ocrScriptPath}` }
			
 
				+  }
			
 
				+  if (!fs.existsSync(resolvedImage)) {
			
 
				+    return { success: false, error: `图片不存在: ${resolvedImage}` }
			
 
				+  }
			
 
				+
			
 
				+  const pythonPath = getPythonPath()
			
 
				+  const r = spawnSync(pythonPath, [ocrScriptPath, '--image', resolvedImage, '--project-root', projectRoot], {
			
 
				+    encoding: 'utf-8',
			
 
				+    timeout: 60000,
			
 
				+    env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
			
 
				+    cwd: projectRoot,
			
 
				+  })
			
 
				+
			
 
				+  const outStr = (r.stdout || '').trim()
			
 
				+  const errStr = (r.stderr || '').trim()
			
 
				+  if (r.status !== 0) {
			
 
				+    return { success: false, error: errStr || outStr || 'OCR 执行失败' }
			
 
				+  }
			
 
				+  let out
			
 
				+  try {
			
 
				+    out = JSON.parse(outStr)
			
 
				+  } catch (e) {
			
 
				+    return { success: false, error: `OCR 输出解析失败: ${outStr}` }
			
 
				+  }
			
 
				+  if (!out.success) {
			
 
				+    return { success: false, error: out.error || 'OCR 识别失败' }
			
 
				+  }
			
 
				+  return { success: true, text: out.text != null ? String(out.text) : '' }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * 在设备截图中查找指定文字，返回该文字区域中心点
			
 
				+ * @param {{ device: string, findText: string, folderPath?: string }} input
			
 
				+ * @returns {{ success: boolean, center?: { x: number, y: number }, error?: string }}
			
 
				+ */
			
 
				+async function executeOcrFindText({ device, findText, folderPath }) {
			
 
				+  if (!device) return { success: false, error: '缺少设备 ID，无法截图' }
			
 
				+  if (!findText || typeof findText !== 'string') return { success: false, error: '缺少要查找的文字' }
			
 
				+  const ts = Date.now()
			
 
				+  const screenshotPath = path.join(os.tmpdir(), `ef-ocr-screenshot-${ts}.png`)
			
 
				+  try {
			
 
				+    captureScreenshot(device, screenshotPath)
			
 
				+    if (!fs.existsSync(screenshotPath) || fs.statSync(screenshotPath).size === 0) {
			
 
				+      return { success: false, error: '设备截图失败或为空' }
			
 
				+    }
			
 
				+    const pythonPath = getPythonPath()
			
 
				+    const r = spawnSync(pythonPath, [ocrScriptPath, '--image', screenshotPath, '--find-text', findText.trim(), '--project-root', projectRoot], {
			
 
				+      encoding: 'utf-8',
			
 
				+      timeout: 60000,
			
 
				+      env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
			
 
				+      cwd: projectRoot,
			
 
				+    })
			
 
				+    const outStr = (r.stdout || '').trim()
			
 
				+    const errStr = (r.stderr || '').trim()
			
 
				+    if (r.status !== 0) {
			
 
				+      return { success: false, error: errStr || outStr || 'OCR 查找文字失败' }
			
 
				+    }
			
 
				+    let out
			
 
				+    try {
			
 
				+      out = JSON.parse(outStr)
			
 
				+    } catch (e) {
			
 
				+      return { success: false, error: `OCR 输出解析失败: ${outStr}` }
			
 
				+    }
			
 
				+    if (!out.success || out.x == null || out.y == null) {
			
 
				+      return { success: false, error: out.error || '图中未找到该文字' }
			
 
				+    }
			
 
				+    return { success: true, center: { x: out.x, y: out.y } }
			
 
				+  } finally {
			
 
				+    try { fs.unlinkSync(screenshotPath) } catch (_) {}
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+module.exports = { tagName, schema, executeOcr, executeOcrFindText }
			
--- a/nodejs/ef-compiler/sequence-runner.js
+++ b/nodejs/ef-compiler/sequence-runner.js
@@ -1,3 +1,5 @@
 
				+const { logActionError } = require('./actions/echo-parser.js')
			
 
				+
			
 
				 /**
			
 
				  * 执行操作序列（schedule/if/for/while + 普通步骤）
			
 
				  * 单文件 ≤500 行。ctx: executeAction, logMessage, evaluateCondition, getActionName, parseDelayString, calculateWaitTime, state
			
@@ -185,13 +187,10 @@ async function executeActionSequence(
 
				 
			
 
				       if (result.success && result.skipped) { /* 步骤跳过不写 log */ }
			
 
				 
			
 
				-      // 统一在此处将结点报错写入 log.txt，各结点只需 return { success: false, error } 即可，无需单独写 logMessage
			
 
				+      // 统一由 echo-parser.logActionError 打印结点报错，各结点只需 return { success: false, error } 即可
			
 
				       if (!result.success) {
			
 
				-        const now = new Date()
			
 
				-        const timeStr = `${now.getFullYear()}/${String(now.getMonth() + 1).padStart(2, '0')}/${String(now.getDate()).padStart(2, '0')} ${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}:${String(now.getSeconds()).padStart(2, '0')}`
			
 
				+        await logActionError(action, result, { getActionName, logMessage, folderPath }).catch(() => {})
			
 
				         const errDetail = result.error != null && result.error !== '' ? String(result.error) : 'unknown'
			
 
				-        const errorMsg = `[sequence-runner] [ERROR] ${getActionName(action)} failed: ${errDetail} [time: ${timeStr}]`
			
 
				-        await logMessage(errorMsg, folderPath).catch(() => {})
			
 
				         return { success: false, error: errDetail, completedSteps: i }
			
 
				       }
			
 
				 
			
--- a/python/download-onnxocr.bat
+++ b/python/download-onnxocr.bat
@@ -0,0 +1,6 @@
 
				+@echo off
			
 
				+chcp 65001 >nul
			
 
				+cd /d "%~dp0"
			
 
				+echo Downloading OnnxOCR from GitHub...
			
 
				+powershell -ExecutionPolicy Bypass -File "%~dp0download-onnxocr.ps1"
			
 
				+pause
			
--- a/python/download-onnxocr.ps1
+++ b/python/download-onnxocr.ps1
@@ -0,0 +1,33 @@
 
				+# 从 GitHub 下载 OnnxOCR 到当前 python 目录
			
 
				+# 用法：在 PowerShell 中执行 .\download-onnxocr.ps1
			
 
				+
			
 
				+$ErrorActionPreference = "Stop"
			
 
				+$pythonDir = $PSScriptRoot
			
 
				+$zipPath = Join-Path $pythonDir "onnxocr-main.zip"
			
 
				+$extractPath = Join-Path $pythonDir "onnxocr-temp"
			
 
				+$targetPath = Join-Path $pythonDir "OnnxOCR"
			
 
				+
			
 
				+Write-Host "Downloading OnnxOCR from GitHub..."
			
 
				+try {
			
 
				+    Invoke-WebRequest -Uri "https://github.com/jingsongliujing/onnxocr/archive/refs/heads/main.zip" `
			
 
				+        -OutFile $zipPath -UseBasicParsing
			
 
				+} catch {
			
 
				+    Write-Host "Download failed. Try git clone:"
			
 
				+    Write-Host "  cd $pythonDir"
			
 
				+    Write-Host "  git clone --depth 1 https://github.com/jingsongliujing/onnxocr.git OnnxOCR"
			
 
				+    exit 1
			
 
				+}
			
 
				+
			
 
				+Write-Host "Extracting..."
			
 
				+if (Test-Path $extractPath) { Remove-Item -Recurse -Force $extractPath }
			
 
				+Expand-Archive -Path $zipPath -DestinationPath $extractPath -Force
			
 
				+
			
 
				+$innerFolder = Get-ChildItem $extractPath -Directory | Select-Object -First 1
			
 
				+if ($innerFolder) {
			
 
				+    if (Test-Path $targetPath) { Remove-Item -Recurse -Force $targetPath }
			
 
				+    Move-Item -Path $innerFolder.FullName -Destination $targetPath
			
 
				+}
			
 
				+Remove-Item -Recurse -Force $extractPath -ErrorAction SilentlyContinue
			
 
				+Remove-Item -Force $zipPath -ErrorAction SilentlyContinue
			
 
				+
			
 
				+Write-Host "Done. OnnxOCR is at: $targetPath"
			
--- a/python/onnxocr/.gitignore
+++ b/python/onnxocr/.gitignore
@@ -0,0 +1,160 @@
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+wheels/
			
 
				+share/python-wheels/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+MANIFEST
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.nox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*.cover
			
 
				+*.py,cover
			
 
				+.hypothesis/
			
 
				+.pytest_cache/
			
 
				+cover/
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+local_settings.py
			
 
				+db.sqlite3
			
 
				+db.sqlite3-journal
			
 
				+
			
 
				+# Flask stuff:
			
 
				+instance/
			
 
				+.webassets-cache
			
 
				+
			
 
				+# Scrapy stuff:
			
 
				+.scrapy
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+.pybuilder/
			
 
				+target/
			
 
				+
			
 
				+# Jupyter Notebook
			
 
				+.ipynb_checkpoints
			
 
				+
			
 
				+# IPython
			
 
				+profile_default/
			
 
				+ipython_config.py
			
 
				+
			
 
				+# pyenv
			
 
				+#   For a library or package, you might want to ignore these files since the code is
			
 
				+#   intended to run in multiple environments; otherwise, check them in:
			
 
				+# .python-version
			
 
				+
			
 
				+# pipenv
			
 
				+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
			
 
				+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
			
 
				+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
			
 
				+#   install all needed dependencies.
			
 
				+#Pipfile.lock
			
 
				+
			
 
				+# poetry
			
 
				+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
			
 
				+#   This is especially recommended for binary packages to ensure reproducibility, and is more
			
 
				+#   commonly ignored for libraries.
			
 
				+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
			
 
				+#poetry.lock
			
 
				+
			
 
				+# pdm
			
 
				+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
			
 
				+#pdm.lock
			
 
				+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
			
 
				+#   in version control.
			
 
				+#   https://pdm.fming.dev/#use-with-ide
			
 
				+.pdm.toml
			
 
				+
			
 
				+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
			
 
				+__pypackages__/
			
 
				+
			
 
				+# Celery stuff
			
 
				+celerybeat-schedule
			
 
				+celerybeat.pid
			
 
				+
			
 
				+# SageMath parsed files
			
 
				+*.sage.py
			
 
				+
			
 
				+# Environments
			
 
				+.env
			
 
				+.venv
			
 
				+env/
			
 
				+venv/
			
 
				+ENV/
			
 
				+env.bak/
			
 
				+venv.bak/
			
 
				+
			
 
				+# Spyder project settings
			
 
				+.spyderproject
			
 
				+.spyproject
			
 
				+
			
 
				+# Rope project settings
			
 
				+.ropeproject
			
 
				+
			
 
				+# mkdocs documentation
			
 
				+/site
			
 
				+
			
 
				+# mypy
			
 
				+.mypy_cache/
			
 
				+.dmypy.json
			
 
				+dmypy.json
			
 
				+
			
 
				+# Pyre type checker
			
 
				+.pyre/
			
 
				+
			
 
				+# pytype static type analyzer
			
 
				+.pytype/
			
 
				+
			
 
				+# Cython debug symbols
			
 
				+cython_debug/
			
 
				+
			
 
				+# PyCharm
			
 
				+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
			
 
				+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
			
 
				+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
			
 
				+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
			
 
				+#.idea/
			
--- a/python/onnxocr/Dockerfile
+++ b/python/onnxocr/Dockerfile
@@ -0,0 +1,26 @@
 
				+# 使用 Python 3.7 作为基础镜像
			
 
				+FROM python:3.7-slim
			
 
				+
			
 
				+# 设置工作目录
			
 
				+WORKDIR /app
			
 
				+
			
 
				+# 复制 requirements.txt 到工作目录
			
 
				+COPY requirements.txt .
			
 
				+
			
 
				+# 安装依赖
			
 
				+RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
			
 
				+RUN apt-get update
			
 
				+RUN apt-get install ffmpeg libsm6 libxext6  -y
			
 
				+RUN apt-get install libgl1
			
 
				+
			
 
				+# 复制项目目录中的所有文件到镜像中
			
 
				+COPY . .
			
 
				+
			
 
				+# 设置环境变量（如果需要）
			
 
				+ENV PYTHONUNBUFFERED=1
			
 
				+
			
 
				+# 暴露服务端口（假设你的 Flask 服务运行在 5005 端口）
			
 
				+EXPOSE 5005
			
 
				+
			
 
				+# 启动 Flask 服务
			
 
				+CMD ["python", "app-service.py"]
			
--- a/python/onnxocr/LICENSE
+++ b/python/onnxocr/LICENSE
@@ -0,0 +1,201 @@
 
				+                                 Apache License
			
 
				+                           Version 2.0, January 2004
			
 
				+                        http://www.apache.org/licenses/
			
 
				+
			
 
				+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				+
			
 
				+   1. Definitions.
			
 
				+
			
 
				+      "License" shall mean the terms and conditions for use, reproduction,
			
 
				+      and distribution as defined by Sections 1 through 9 of this document.
			
 
				+
			
 
				+      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				+      the copyright owner that is granting the License.
			
 
				+
			
 
				+      "Legal Entity" shall mean the union of the acting entity and all
			
 
				+      other entities that control, are controlled by, or are under common
			
 
				+      control with that entity. For the purposes of this definition,
			
 
				+      "control" means (i) the power, direct or indirect, to cause the
			
 
				+      direction or management of such entity, whether by contract or
			
 
				+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				+      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				+
			
 
				+      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				+      exercising permissions granted by this License.
			
 
				+
			
 
				+      "Source" form shall mean the preferred form for making modifications,
			
 
				+      including but not limited to software source code, documentation
			
 
				+      source, and configuration files.
			
 
				+
			
 
				+      "Object" form shall mean any form resulting from mechanical
			
 
				+      transformation or translation of a Source form, including but
			
 
				+      not limited to compiled object code, generated documentation,
			
 
				+      and conversions to other media types.
			
 
				+
			
 
				+      "Work" shall mean the work of authorship, whether in Source or
			
 
				+      Object form, made available under the License, as indicated by a
			
 
				+      copyright notice that is included in or attached to the work
			
 
				+      (an example is provided in the Appendix below).
			
 
				+
			
 
				+      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				+      form, that is based on (or derived from) the Work and for which the
			
 
				+      editorial revisions, annotations, elaborations, or other modifications
			
 
				+      represent, as a whole, an original work of authorship. For the purposes
			
 
				+      of this License, Derivative Works shall not include works that remain
			
 
				+      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				+      the Work and Derivative Works thereof.
			
 
				+
			
 
				+      "Contribution" shall mean any work of authorship, including
			
 
				+      the original version of the Work and any modifications or additions
			
 
				+      to that Work or Derivative Works thereof, that is intentionally
			
 
				+      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				+      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				+      the copyright owner. For the purposes of this definition, "submitted"
			
 
				+      means any form of electronic, verbal, or written communication sent
			
 
				+      to the Licensor or its representatives, including but not limited to
			
 
				+      communication on electronic mailing lists, source code control systems,
			
 
				+      and issue tracking systems that are managed by, or on behalf of, the
			
 
				+      Licensor for the purpose of discussing and improving the Work, but
			
 
				+      excluding communication that is conspicuously marked or otherwise
			
 
				+      designated in writing by the copyright owner as "Not a Contribution."
			
 
				+
			
 
				+      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				+      on behalf of whom a Contribution has been received by Licensor and
			
 
				+      subsequently incorporated within the Work.
			
 
				+
			
 
				+   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      copyright license to reproduce, prepare Derivative Works of,
			
 
				+      publicly display, publicly perform, sublicense, and distribute the
			
 
				+      Work and such Derivative Works in Source or Object form.
			
 
				+
			
 
				+   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      (except as stated in this section) patent license to make, have made,
			
 
				+      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				+      where such license applies only to those patent claims licensable
			
 
				+      by such Contributor that are necessarily infringed by their
			
 
				+      Contribution(s) alone or by combination of their Contribution(s)
			
 
				+      with the Work to which such Contribution(s) was submitted. If You
			
 
				+      institute patent litigation against any entity (including a
			
 
				+      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				+      or a Contribution incorporated within the Work constitutes direct
			
 
				+      or contributory patent infringement, then any patent licenses
			
 
				+      granted to You under this License for that Work shall terminate
			
 
				+      as of the date such litigation is filed.
			
 
				+
			
 
				+   4. Redistribution. You may reproduce and distribute copies of the
			
 
				+      Work or Derivative Works thereof in any medium, with or without
			
 
				+      modifications, and in Source or Object form, provided that You
			
 
				+      meet the following conditions:
			
 
				+
			
 
				+      (a) You must give any other recipients of the Work or
			
 
				+          Derivative Works a copy of this License; and
			
 
				+
			
 
				+      (b) You must cause any modified files to carry prominent notices
			
 
				+          stating that You changed the files; and
			
 
				+
			
 
				+      (c) You must retain, in the Source form of any Derivative Works
			
 
				+          that You distribute, all copyright, patent, trademark, and
			
 
				+          attribution notices from the Source form of the Work,
			
 
				+          excluding those notices that do not pertain to any part of
			
 
				+          the Derivative Works; and
			
 
				+
			
 
				+      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				+          distribution, then any Derivative Works that You distribute must
			
 
				+          include a readable copy of the attribution notices contained
			
 
				+          within such NOTICE file, excluding those notices that do not
			
 
				+          pertain to any part of the Derivative Works, in at least one
			
 
				+          of the following places: within a NOTICE text file distributed
			
 
				+          as part of the Derivative Works; within the Source form or
			
 
				+          documentation, if provided along with the Derivative Works; or,
			
 
				+          within a display generated by the Derivative Works, if and
			
 
				+          wherever such third-party notices normally appear. The contents
			
 
				+          of the NOTICE file are for informational purposes only and
			
 
				+          do not modify the License. You may add Your own attribution
			
 
				+          notices within Derivative Works that You distribute, alongside
			
 
				+          or as an addendum to the NOTICE text from the Work, provided
			
 
				+          that such additional attribution notices cannot be construed
			
 
				+          as modifying the License.
			
 
				+
			
 
				+      You may add Your own copyright statement to Your modifications and
			
 
				+      may provide additional or different license terms and conditions
			
 
				+      for use, reproduction, or distribution of Your modifications, or
			
 
				+      for any such Derivative Works as a whole, provided Your use,
			
 
				+      reproduction, and distribution of the Work otherwise complies with
			
 
				+      the conditions stated in this License.
			
 
				+
			
 
				+   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				+      any Contribution intentionally submitted for inclusion in the Work
			
 
				+      by You to the Licensor shall be under the terms and conditions of
			
 
				+      this License, without any additional terms or conditions.
			
 
				+      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				+      the terms of any separate license agreement you may have executed
			
 
				+      with Licensor regarding such Contributions.
			
 
				+
			
 
				+   6. Trademarks. This License does not grant permission to use the trade
			
 
				+      names, trademarks, service marks, or product names of the Licensor,
			
 
				+      except as required for reasonable and customary use in describing the
			
 
				+      origin of the Work and reproducing the content of the NOTICE file.
			
 
				+
			
 
				+   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				+      agreed to in writing, Licensor provides the Work (and each
			
 
				+      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				+      implied, including, without limitation, any warranties or conditions
			
 
				+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				+      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				+      appropriateness of using or redistributing the Work and assume any
			
 
				+      risks associated with Your exercise of permissions under this License.
			
 
				+
			
 
				+   8. Limitation of Liability. In no event and under no legal theory,
			
 
				+      whether in tort (including negligence), contract, or otherwise,
			
 
				+      unless required by applicable law (such as deliberate and grossly
			
 
				+      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				+      liable to You for damages, including any direct, indirect, special,
			
 
				+      incidental, or consequential damages of any character arising as a
			
 
				+      result of this License or out of the use or inability to use the
			
 
				+      Work (including but not limited to damages for loss of goodwill,
			
 
				+      work stoppage, computer failure or malfunction, or any and all
			
 
				+      other commercial damages or losses), even if such Contributor
			
 
				+      has been advised of the possibility of such damages.
			
 
				+
			
 
				+   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				+      the Work or Derivative Works thereof, You may choose to offer,
			
 
				+      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				+      or other liability obligations and/or rights consistent with this
			
 
				+      License. However, in accepting such obligations, You may act only
			
 
				+      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				+      of any other Contributor, and only if You agree to indemnify,
			
 
				+      defend, and hold each Contributor harmless for any liability
			
 
				+      incurred by, or claims asserted against, such Contributor by reason
			
 
				+      of your accepting any such warranty or additional liability.
			
 
				+
			
 
				+   END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+   APPENDIX: How to apply the Apache License to your work.
			
 
				+
			
 
				+      To apply the Apache License to your work, attach the following
			
 
				+      boilerplate notice, with the fields enclosed by brackets "[]"
			
 
				+      replaced with your own identifying information. (Don't include
			
 
				+      the brackets!)  The text should be enclosed in the appropriate
			
 
				+      comment syntax for the file format. We also recommend that a
			
 
				+      file or class name and description of purpose be included on the
			
 
				+      same "printed page" as the copyright notice for easier
			
 
				+      identification within third-party archives.
			
 
				+
			
 
				+   Copyright [yyyy] [name of copyright owner]
			
 
				+
			
 
				+   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+   you may not use this file except in compliance with the License.
			
 
				+   You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
--- a/python/onnxocr/Readme.md
+++ b/python/onnxocr/Readme.md
@@ -0,0 +1,163 @@
 
				+
			
 
				+English | [简体中文](./Readme_cn.md) |
			
 
				+
			
 
				+### **OnnxOCR**  
			
 
				+### ![onnx_logo](onnxocr/test_images/onnxocr_logo.png)  
			
 
				+
			
 
				+**A High-Performance Multilingual OCR Engine Based on ONNX**  
			
 
				+
			
 
				+[![GitHub Stars](https://img.shields.io/github/stars/jingsongliujing/OnnxOCR?style=social&label=Star&maxAge=3600)](https://github.com/jingsongliujing/OnnxOCR/stargazers)  
			
 
				+[![GitHub Forks](https://img.shields.io/github/forks/jingsongliujing/OnnxOCR?style=social&label=Fork&maxAge=3600)](https://github.com/jingsongliujing/OnnxOCR/network/members)  
			
 
				+[![GitHub License](https://img.shields.io/github/license/jingsongliujing/OnnxOCR)](https://github.com/jingsongliujing/OnnxOCR/blob/main/LICENSE)  
			
 
				+[![Python Version](https://img.shields.io/badge/Python-%E2%89%A53.6-blue.svg)](https://www.python.org/)  
			
 
				+
			
 
				+
			
 
				+## 🚀 Version Updates  
			
 
				+- **2025.05.21**  
			
 
				+  1. Added PP-OCRv5 model, supporting 5 language types in a single model: Simplified Chinese, Traditional Chinese, Chinese Pinyin, English, and Japanese.  
			
 
				+  2. Overall recognition accuracy improved by 13% compared to PP-OCRv4.  
			
 
				+  3. Accuracy is consistent with PaddleOCR 3.0.  
			
 
				+
			
 
				+
			
 
				+## 🌟 Core Advantages  
			
 
				+1. **Deep Learning Framework-Free**: A universal OCR engine ready for direct deployment.  
			
 
				+2. **Cross-Architecture Support**: Uses PaddleOCR-converted ONNX models, rebuilt for deployment on both ARM and x86 architecture computers with unchanged accuracy under limited computing power.  
			
 
				+3. **High-Performance Inference**: Faster inference speed on computers with the same performance.  
			
 
				+4. **Multilingual Support**: Single model supports 5 language types: Simplified Chinese, Traditional Chinese, Chinese Pinyin, English, and Japanese.  
			
 
				+5. **Model Accuracy**: Consistent with PaddleOCR models.  
			
 
				+6. **Domestic Hardware Adaptation**: Restructured code architecture for easy adaptation to more domestic GPUs by modifying only the inference engine.  
			
 
				+
			
 
				+
			
 
				+## 🛠️ Environment Setup  
			
 
				+```bash  
			
 
				+python>=3.6  
			
 
				+
			
 
				+pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt  
			
 
				+```  
			
 
				+
			
 
				+**Note**:  
			
 
				+- The Mobile version model is used by default; the PP-OCRv5_Server-ONNX model offers better performance.  
			
 
				+- The Mobile model is already in `onnxocr/models/ppocrv5` and requires no download;  
			
 
				+- The PP-OCRv5_Server-ONNX model is large and uploaded to [Baidu Netdisk](https://pan.baidu.com/s/1hpENH_SkLDdwXkmlsX0GUQ?pwd=wu8t) (extraction code: wu8t). After downloading, place the `det` and `rec` models in `./models/ppocrv5/` to replace the existing ones.  
			
 
				+
			
 
				+
			
 
				+## 🚀 One-Click Run  
			
 
				+```bash  
			
 
				+python test_ocr.py  
			
 
				+```  
			
 
				+
			
 
				+
			
 
				+## 📡 API Service (CPU Example)  
			
 
				+### Start Service  
			
 
				+```bash  
			
 
				+python app-service.py  
			
 
				+```  
			
 
				+
			
 
				+### Test Example  
			
 
				+#### Request  
			
 
				+```bash  
			
 
				+curl -X POST http://localhost:5005/ocr \  
			
 
				+-H "Content-Type: application/json" \  
			
 
				+-d '{"image": "base64_encoded_image_data"}'  
			
 
				+```  
			
 
				+
			
 
				+#### Response  
			
 
				+```json  
			
 
				+{  
			
 
				+  "processing_time": 0.456,  
			
 
				+  "results": [  
			
 
				+    {  
			
 
				+      "text": "Name",  
			
 
				+      "confidence": 0.9999361634254456,  
			
 
				+      "bounding_box": [[4.0, 8.0], [31.0, 8.0], [31.0, 24.0], [4.0, 24.0]]  
			
 
				+    },  
			
 
				+    {  
			
 
				+      "text": "Header",  
			
 
				+      "confidence": 0.9998759031295776,  
			
 
				+      "bounding_box": [[233.0, 7.0], [258.0, 7.0], [258.0, 23.0], [233.0, 23.0]]  
			
 
				+    }  
			
 
				+  ]  
			
 
				+}  
			
 
				+```  
			
 
				+
			
 
				+
			
 
				+## 🐳 Docker Image Environment (CPU)  
			
 
				+### Build Image  
			
 
				+```bash  
			
 
				+docker build -t ocr-service .  
			
 
				+```  
			
 
				+
			
 
				+### Run Image  
			
 
				+```bash  
			
 
				+docker run -itd --name onnxocr-service-v3 -p 5006:5005 onnxocr-service:v3  
			
 
				+```  
			
 
				+
			
 
				+### POST Request  
			
 
				+```  
			
 
				+url: ip:5006/ocr  
			
 
				+```  
			
 
				+
			
 
				+### Response Example  
			
 
				+```json  
			
 
				+{  
			
 
				+  "processing_time": 0.456,  
			
 
				+  "results": [  
			
 
				+    {  
			
 
				+      "text": "Name",  
			
 
				+      "confidence": 0.9999361634254456,  
			
 
				+      "bounding_box": [[4.0, 8.0], [31.0, 8.0], [31.0, 24.0], [4.0, 24.0]]  
			
 
				+    },  
			
 
				+    {  
			
 
				+      "text": "Header",  
			
 
				+      "confidence": 0.9998759031295776,  
			
 
				+      "bounding_box": [[233.0, 7.0], [258.0, 7.0], [258.0, 23.0], [233.0, 23.0]]  
			
 
				+    }  
			
 
				+  ]  
			
 
				+}  
			
 
				+```  
			
 
				+
			
 
				+
			
 
				+## 🌟 Effect Demonstration  
			
 
				+| Example 1 | Example 2 |  
			
 
				+|-----------|-----------|  
			
 
				+| ![](result_img/r1.png) | ![](result_img/r2.png) |  
			
 
				+
			
 
				+| Example 3 | Example 4 |  
			
 
				+|-----------|-----------|  
			
 
				+| ![](result_img/r3.png) | ![](result_img/draw_ocr4.jpg) |  
			
 
				+
			
 
				+| Example 5 | Example 6 |  
			
 
				+|-----------|-----------|  
			
 
				+| ![](result_img/draw_ocr5.jpg) | ![](result_img/555.png) |  
			
 
				+
			
 
				+
			
 
				+## 👨💻 Contact & Communication  
			
 
				+### Career Opportunities  
			
 
				+I am currently seeking job opportunities. Welcome to connect!  
			
 
				+![WeChat QR Code](onnxocr/test_images/myQR.jpg)  
			
 
				+
			
 
				+### OnnxOCR Community  
			
 
				+#### WeChat Group  
			
 
				+![WeChat Group](onnxocr/test_images/微信群.jpg)  
			
 
				+
			
 
				+#### QQ Group  
			
 
				+![QQ Group](onnxocr/test_images/QQ群.jpg)  
			
 
				+
			
 
				+
			
 
				+## 🎉 Acknowledgments  
			
 
				+Thanks to [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) for technical support!  
			
 
				+
			
 
				+
			
 
				+## 🌍 Open Source & Donations  
			
 
				+I am passionate about open source and AI technology, believing they can bring convenience and help to those in need, making the world a better place. If you recognize this project, you can support it via Alipay or WeChat Pay (please note "Support OnnxOCR" in the remarks).  
			
 
				+
			
 
				+<img src="onnxocr/test_images/weixin_pay.jpg" alt="WeChat Pay" width="200">
			
 
				+<img src="onnxocr/test_images/zhifubao_pay.jpg" alt="Alipay" width="200">
			
 
				+
			
 
				+
			
 
				+## 📈 Star History  
			
 
				+[![Star History Chart](https://api.star-history.com/svg?repos=jingsongliujing/OnnxOCR&type=Date)](https://star-history.com/#jingsongliujing/OnnxOCR&Date)  
			
 
				+
			
 
				+
			
 
				+## 🤝 Contribution Guidelines  
			
 
				+Welcome to submit Issues and Pull Requests to improve the project together!  
			
--- a/python/onnxocr/Readme_cn.md
+++ b/python/onnxocr/Readme_cn.md
@@ -0,0 +1,163 @@
 
				+如果项目对您有帮助，欢迎点击右上角 **Star** 支持！✨  
			
 
				+### **OnnxOCR**  
			
 
				+### ![onnx_logo](onnxocr/test_images/onnxocr_logo.png)  
			
 
				+
			
 
				+**基于 ONNX 的高性能多语言 OCR 引擎**  
			
 
				+![GitHub stars](https://img.shields.io/github/stars/jingsongliujing/OnnxOCR?style=social)  
			
 
				+![GitHub forks](https://img.shields.io/github/forks/jingsongliujing/OnnxOCR?style=social)  
			
 
				+![GitHub license](https://img.shields.io/github/license/jingsongliujing/OnnxOCR)  
			
 
				+![Python Version](https://img.shields.io/badge/python-≥3.6-blue.svg)  
			
 
				+
			
 
				+
			
 
				+## 🚀 版本更新  
			
 
				+- **2025.05.21**  
			
 
				+  1. 新增 PP-OCRv5 模型，单模型支持 5 种文字类型：简体中文、繁体中文、中文拼音、英文和日文。  
			
 
				+  2. 整体识别精度相比ppocrv4提升13个百分点
			
 
				+  3. 精度与Paddleocr3.0保持一致。
			
 
				+
			
 
				+
			
 
				+## 🌟 核心优势  
			
 
				+1. **脱离深度学习训练框架**：可直接用于部署的通用 OCR。  
			
 
				+2. **跨架构支持**：在算力有限、精度不变的情况下，使用 PaddleOCR 转成 ONNX 模型，重新构建的可部署在 ARM 架构和 x86 架构计算机上的 OCR 模型。  
			
 
				+3. **高性能推理**：在同样性能的计算机上推理速度加速。  
			
 
				+4. **多语言支持**：单模型支持 5 种文字类型：简体中文、繁体中文、中文拼音、英文和日文。  
			
 
				+5. **模型精度**：与 PaddleOCR 模型保持一致。
			
 
				+6. **国产化适配**：重构代码工程架构，只需简单进行推理引擎的修改，即可适配更多国产化显卡。
			
 
				+
			
 
				+
			
 
				+
			
 
				+## 🛠️ 环境安装  
			
 
				+```bash  
			
 
				+python>=3.6  
			
 
				+
			
 
				+pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt  
			
 
				+```  
			
 
				+
			
 
				+**注意**：  
			
 
				+- 默认使用 Mobile 版本模型，使用 PP-OCRv5_Server-ONNX 模型效果更佳。  
			
 
				+- Mobile 模型已存在于 `onnxocr/models/ppocrv5` 下，无需下载；  
			
 
				+- PP-OCRv5_Server-ONNX 模型过大，已上传至 [百度网盘](https://pan.baidu.com/s/1hpENH_SkLDdwXkmlsX0GUQ?pwd=wu8t)（提取码: wu8t），下载后将 `det` 和 `rec` 模型放到 `./models/ppocrv5/` 下替换即可。  
			
 
				+
			
 
				+
			
 
				+## 🚀 一键运行  
			
 
				+```bash  
			
 
				+python test_ocr.py  
			
 
				+```  
			
 
				+
			
 
				+
			
 
				+## 📡 API 服务（CPU 示例）  
			
 
				+### 启动服务  
			
 
				+```bash  
			
 
				+python app-service.py  
			
 
				+```  
			
 
				+
			
 
				+### 测试示例  
			
 
				+#### 请求  
			
 
				+```bash  
			
 
				+curl -X POST http://localhost:5005/ocr \  
			
 
				+-H "Content-Type: application/json" \  
			
 
				+-d '{"image": "base64_encoded_image_data"}'  
			
 
				+```  
			
 
				+
			
 
				+#### 响应  
			
 
				+```json  
			
 
				+{  
			
 
				+  "processing_time": 0.456,  
			
 
				+  "results": [  
			
 
				+    {  
			
 
				+      "text": "名称",  
			
 
				+      "confidence": 0.9999361634254456,  
			
 
				+      "bounding_box": [[4.0, 8.0], [31.0, 8.0], [31.0, 24.0], [4.0, 24.0]]  
			
 
				+    },  
			
 
				+    {  
			
 
				+      "text": "标头",  
			
 
				+      "confidence": 0.9998759031295776,  
			
 
				+      "bounding_box": [[233.0, 7.0], [258.0, 7.0], [258.0, 23.0], [233.0, 23.0]]  
			
 
				+    }  
			
 
				+  ]  
			
 
				+}  
			
 
				+```  
			
 
				+
			
 
				+
			
 
				+## 🐳 Docker 镜像环境（CPU）  
			
 
				+### 镜像构建  
			
 
				+```bash  
			
 
				+docker build -t ocr-service .  
			
 
				+```  
			
 
				+
			
 
				+### 镜像启动  
			
 
				+```bash  
			
 
				+docker run -itd --name onnxocr-service-v3 -p 5006:5005 onnxocr-service:v3  
			
 
				+```  
			
 
				+
			
 
				+### POST 请求  
			
 
				+```  
			
 
				+url: ip:5006/ocr  
			
 
				+```  
			
 
				+
			
 
				+### 返回值示例  
			
 
				+```json  
			
 
				+{  
			
 
				+  "processing_time": 0.456,  
			
 
				+  "results": [  
			
 
				+    {  
			
 
				+      "text": "名称",  
			
 
				+      "confidence": 0.9999361634254456,  
			
 
				+      "bounding_box": [[4.0, 8.0], [31.0, 8.0], [31.0, 24.0], [4.0, 24.0]]  
			
 
				+    },  
			
 
				+    {  
			
 
				+      "text": "标头",  
			
 
				+      "confidence": 0.9998759031295776,  
			
 
				+      "bounding_box": [[233.0, 7.0], [258.0, 7.0], [258.0, 23.0], [233.0, 23.0]]  
			
 
				+    }  
			
 
				+  ]  
			
 
				+}  
			
 
				+```  
			
 
				+
			
 
				+
			
 
				+## 🌟 效果展示  
			
 
				+| 示例 1 | 示例 2 |  
			
 
				+|--------|--------|  
			
 
				+| ![](result_img/r1.png) | ![](result_img/r2.png) |  
			
 
				+
			
 
				+| 示例 3 | 示例 4 |  
			
 
				+|--------|--------|  
			
 
				+| ![](result_img/r3.png) | ![](result_img/draw_ocr4.jpg) |  
			
 
				+
			
 
				+| 示例 5 | 示例 6 |  
			
 
				+|--------|--------|  
			
 
				+| ![](result_img/draw_ocr5.jpg) | ![](result_img/555.png) |  
			
 
				+
			
 
				+
			
 
				+## 👨💻 联系与交流  
			
 
				+### 求职信息  
			
 
				+本人正在寻求工作机会，欢迎联系！  
			
 
				+![微信二维码](onnxocr/test_images/myQR.jpg)  
			
 
				+
			
 
				+### OnnxOCR 交流群  
			
 
				+#### 微信群  
			
 
				+![微信群](onnxocr/test_images/微信群.jpg)  
			
 
				+
			
 
				+#### QQ 群  
			
 
				+![QQ群](onnxocr/test_images/QQ群.jpg)  
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+## 🎉 致谢  
			
 
				+感谢 [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) 提供的技术支持！  
			
 
				+
			
 
				+
			
 
				+## 🌍 开源与捐赠  
			
 
				+我热爱开源和 AI 技术，相信它们能为有需要的人带来便利和帮助，让世界变得更美好。如果您认可本项目，可以通过支付宝或微信进行打赏（备注请注明支持 OnnxOCR）。  
			
 
				+
			
 
				+<img src="onnxocr/test_images/weixin_pay.jpg" alt="微信支付" width="200">
			
 
				+<img src="onnxocr/test_images/zhifubao_pay.jpg" alt="支付宝" width="200">
			
 
				+
			
 
				+
			
 
				+## 📈 Star 历史  
			
 
				+[![Star History Chart](https://api.star-history.com/svg?repos=jingsongliujing/OnnxOCR&type=Date)](https://star-history.com/#jingsongliujing/OnnxOCR&Date)  
			
 
				+
			
 
				+
			
 
				+## 🤝 贡献指南  
			
 
				+欢迎提交 Issues 和 Pull Requests，共同改进项目！  
			
--- a/python/onnxocr/app-service.py
+++ b/python/onnxocr/app-service.py
@@ -0,0 +1,72 @@
 
				+import cv2
			
 
				+import time
			
 
				+import base64
			
 
				+import numpy as np
			
 
				+from flask import Flask, request, jsonify,render_template
			
 
				+from onnxocr.onnx_paddleocr import ONNXPaddleOcr
			
 
				+
			
 
				+# 初始化 Flask 应用
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+# 初始化 OCR 模型
			
 
				+model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
			
 
				+
			
 
				+@app.route('/')
			
 
				+def index():
			
 
				+    return render_template('index.html')
			
 
				+
			
 
				+@app.route('/ocr', methods=['POST'])
			
 
				+def ocr_service():
			
 
				+    try:
			
 
				+        # 获取请求数据
			
 
				+        data = request.get_json()
			
 
				+        if not data or "image" not in data:
			
 
				+            return jsonify({"error": "Invalid request, 'image' field is required."}), 400
			
 
				+
			
 
				+        # 解码 base64 图像
			
 
				+        image_base64 = data["image"]
			
 
				+        try:
			
 
				+            image_bytes = base64.b64decode(image_base64)
			
 
				+            image_np = np.frombuffer(image_bytes, dtype=np.uint8)
			
 
				+            img = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
			
 
				+            if img is None:
			
 
				+                return jsonify({"error": "Failed to decode image from base64."}), 400
			
 
				+        except Exception as e:
			
 
				+            return jsonify({"error": f"Image decoding failed: {str(e)}"}), 400
			
 
				+
			
 
				+        # 执行 OCR
			
 
				+        start_time = time.time()
			
 
				+        result = model.ocr(img)
			
 
				+        end_time = time.time()
			
 
				+        processing_time = end_time - start_time
			
 
				+
			
 
				+        # 格式化结果
			
 
				+        ocr_results = []
			
 
				+        for line in result[0]:
			
 
				+            # 确保 line[0] 是 NumPy 数组或列表
			
 
				+            if isinstance(line[0], (list, np.ndarray)):
			
 
				+                # 将 bounding_box 转换为 [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] 格式
			
 
				+                bounding_box = np.array(line[0]).reshape(4, 2).tolist()  # 转换为 4x2 列表
			
 
				+            else:
			
 
				+                bounding_box = []
			
 
				+
			
 
				+            ocr_results.append({
			
 
				+                "text": line[1][0],  # 识别文本
			
 
				+                "confidence": float(line[1][1]),  # 置信度
			
 
				+                "bounding_box": bounding_box  # 文本框坐标
			
 
				+            })
			
 
				+
			
 
				+        # 返回结果
			
 
				+        return jsonify({
			
 
				+            "processing_time": processing_time,
			
 
				+            "results": ocr_results
			
 
				+        })
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        # 捕获所有异常并返回错误信息
			
 
				+        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # 启动 Flask 服务
			
 
				+    app.run(host="0.0.0.0", port=5005, debug=False)
			
--- a/python/onnxocr/draw_ocr.jpg
+++ b/python/onnxocr/draw_ocr.jpg
--- a/python/onnxocr/onnxocr/__init__.py
+++ b/python/onnxocr/onnxocr/__init__.py
--- a/python/onnxocr/onnxocr/cls_postprocess.py
+++ b/python/onnxocr/onnxocr/cls_postprocess.py
@@ -0,0 +1,30 @@
 
				+
			
 
				+# import paddle
			
 
				+
			
 
				+
			
 
				+class ClsPostProcess(object):
			
 
				+    """ Convert between text-label and text-index """
			
 
				+
			
 
				+    def __init__(self, label_list=None, key=None, **kwargs):
			
 
				+        super(ClsPostProcess, self).__init__()
			
 
				+        self.label_list = label_list
			
 
				+        self.key = key
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        if self.key is not None:
			
 
				+            preds = preds[self.key]
			
 
				+
			
 
				+        label_list = self.label_list
			
 
				+        if label_list is None:
			
 
				+            label_list = {idx: idx for idx in range(preds.shape[-1])}
			
 
				+
			
 
				+        # if isinstance(preds, paddle.Tensor):
			
 
				+        #     preds = preds.numpy()
			
 
				+
			
 
				+        pred_idxs = preds.argmax(axis=1)
			
 
				+        decode_out = [(label_list[idx], preds[i, idx])
			
 
				+                      for i, idx in enumerate(pred_idxs)]
			
 
				+        if label is None:
			
 
				+            return decode_out
			
 
				+        label = [(label_list[idx], 1.0) for idx in label]
			
 
				+        return decode_out, label
			
--- a/python/onnxocr/onnxocr/db_postprocess.py
+++ b/python/onnxocr/onnxocr/db_postprocess.py
@@ -0,0 +1,276 @@
 
				+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+"""
			
 
				+This code is refered from:
			
 
				+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
			
 
				+"""
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+# import paddle
			
 
				+from shapely.geometry import Polygon
			
 
				+import pyclipper
			
 
				+
			
 
				+
			
 
				+class DBPostProcess(object):
			
 
				+    """
			
 
				+    The post process for Differentiable Binarization (DB).
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 thresh=0.3,
			
 
				+                 box_thresh=0.7,
			
 
				+                 max_candidates=1000,
			
 
				+                 unclip_ratio=2.0,
			
 
				+                 use_dilation=False,
			
 
				+                 score_mode="fast",
			
 
				+                 box_type='quad',
			
 
				+                 **kwargs):
			
 
				+        self.thresh = thresh
			
 
				+        self.box_thresh = box_thresh
			
 
				+        self.max_candidates = max_candidates
			
 
				+        self.unclip_ratio = unclip_ratio
			
 
				+        self.min_size = 3
			
 
				+        self.score_mode = score_mode
			
 
				+        self.box_type = box_type
			
 
				+        assert score_mode in [
			
 
				+            "slow", "fast"
			
 
				+        ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
			
 
				+
			
 
				+        self.dilation_kernel = None if not use_dilation else np.array(
			
 
				+            [[1, 1], [1, 1]])
			
 
				+
			
 
				+    def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
			
 
				+        '''
			
 
				+        _bitmap: single map with shape (1, H, W),
			
 
				+            whose values are binarized as {0, 1}
			
 
				+        '''
			
 
				+
			
 
				+        bitmap = _bitmap
			
 
				+        height, width = bitmap.shape
			
 
				+
			
 
				+        boxes = []
			
 
				+        scores = []
			
 
				+
			
 
				+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
			
 
				+                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
			
 
				+
			
 
				+        for contour in contours[:self.max_candidates]:
			
 
				+            epsilon = 0.002 * cv2.arcLength(contour, True)
			
 
				+            approx = cv2.approxPolyDP(contour, epsilon, True)
			
 
				+            points = approx.reshape((-1, 2))
			
 
				+            if points.shape[0] < 4:
			
 
				+                continue
			
 
				+
			
 
				+            score = self.box_score_fast(pred, points.reshape(-1, 2))
			
 
				+            if self.box_thresh > score:
			
 
				+                continue
			
 
				+
			
 
				+            if points.shape[0] > 2:
			
 
				+                box = self.unclip(points, self.unclip_ratio)
			
 
				+                if len(box) > 1:
			
 
				+                    continue
			
 
				+            else:
			
 
				+                continue
			
 
				+            box = box.reshape(-1, 2)
			
 
				+
			
 
				+            _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
			
 
				+            if sside < self.min_size + 2:
			
 
				+                continue
			
 
				+
			
 
				+            box = np.array(box)
			
 
				+            box[:, 0] = np.clip(
			
 
				+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
			
 
				+            box[:, 1] = np.clip(
			
 
				+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
			
 
				+            boxes.append(box.tolist())
			
 
				+            scores.append(score)
			
 
				+        return boxes, scores
			
 
				+
			
 
				+    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
			
 
				+        '''
			
 
				+        _bitmap: single map with shape (1, H, W),
			
 
				+                whose values are binarized as {0, 1}
			
 
				+        '''
			
 
				+
			
 
				+        bitmap = _bitmap
			
 
				+        height, width = bitmap.shape
			
 
				+
			
 
				+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
			
 
				+                                cv2.CHAIN_APPROX_SIMPLE)
			
 
				+        if len(outs) == 3:
			
 
				+            img, contours, _ = outs[0], outs[1], outs[2]
			
 
				+        elif len(outs) == 2:
			
 
				+            contours, _ = outs[0], outs[1]
			
 
				+
			
 
				+        num_contours = min(len(contours), self.max_candidates)
			
 
				+
			
 
				+        boxes = []
			
 
				+        scores = []
			
 
				+        for index in range(num_contours):
			
 
				+            contour = contours[index]
			
 
				+            points, sside = self.get_mini_boxes(contour)
			
 
				+            if sside < self.min_size:
			
 
				+                continue
			
 
				+            points = np.array(points)
			
 
				+            if self.score_mode == "fast":
			
 
				+                score = self.box_score_fast(pred, points.reshape(-1, 2))
			
 
				+            else:
			
 
				+                score = self.box_score_slow(pred, contour)
			
 
				+            if self.box_thresh > score:
			
 
				+                continue
			
 
				+
			
 
				+            box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
			
 
				+            box, sside = self.get_mini_boxes(box)
			
 
				+            if sside < self.min_size + 2:
			
 
				+                continue
			
 
				+            box = np.array(box)
			
 
				+
			
 
				+            box[:, 0] = np.clip(
			
 
				+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
			
 
				+            box[:, 1] = np.clip(
			
 
				+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
			
 
				+            boxes.append(box.astype("int32"))
			
 
				+            scores.append(score)
			
 
				+        return np.array(boxes, dtype="int32"), scores
			
 
				+
			
 
				+    def unclip(self, box, unclip_ratio):
			
 
				+        poly = Polygon(box)
			
 
				+        distance = poly.area * unclip_ratio / poly.length
			
 
				+        offset = pyclipper.PyclipperOffset()
			
 
				+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
			
 
				+        expanded = np.array(offset.Execute(distance))
			
 
				+        return expanded
			
 
				+
			
 
				+    def get_mini_boxes(self, contour):
			
 
				+        bounding_box = cv2.minAreaRect(contour)
			
 
				+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				+
			
 
				+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
			
 
				+        if points[1][1] > points[0][1]:
			
 
				+            index_1 = 0
			
 
				+            index_4 = 1
			
 
				+        else:
			
 
				+            index_1 = 1
			
 
				+            index_4 = 0
			
 
				+        if points[3][1] > points[2][1]:
			
 
				+            index_2 = 2
			
 
				+            index_3 = 3
			
 
				+        else:
			
 
				+            index_2 = 3
			
 
				+            index_3 = 2
			
 
				+
			
 
				+        box = [
			
 
				+            points[index_1], points[index_2], points[index_3], points[index_4]
			
 
				+        ]
			
 
				+        return box, min(bounding_box[1])
			
 
				+
			
 
				+    def box_score_fast(self, bitmap, _box):
			
 
				+        '''
			
 
				+        box_score_fast: use bbox mean score as the mean score
			
 
				+        '''
			
 
				+        h, w = bitmap.shape[:2]
			
 
				+        box = _box.copy()
			
 
				+        xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1)
			
 
				+        xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1)
			
 
				+        ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1)
			
 
				+        ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1)
			
 
				+
			
 
				+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
			
 
				+        box[:, 0] = box[:, 0] - xmin
			
 
				+        box[:, 1] = box[:, 1] - ymin
			
 
				+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1)
			
 
				+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
			
 
				+
			
 
				+    def box_score_slow(self, bitmap, contour):
			
 
				+        '''
			
 
				+        box_score_slow: use polyon mean score as the mean score
			
 
				+        '''
			
 
				+        h, w = bitmap.shape[:2]
			
 
				+        contour = contour.copy()
			
 
				+        contour = np.reshape(contour, (-1, 2))
			
 
				+
			
 
				+        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
			
 
				+        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
			
 
				+        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
			
 
				+        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
			
 
				+
			
 
				+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
			
 
				+
			
 
				+        contour[:, 0] = contour[:, 0] - xmin
			
 
				+        contour[:, 1] = contour[:, 1] - ymin
			
 
				+
			
 
				+        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
			
 
				+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
			
 
				+
			
 
				+    def __call__(self, outs_dict, shape_list):
			
 
				+        pred = outs_dict['maps']
			
 
				+        # if isinstance(pred, paddle.Tensor):
			
 
				+        #     pred = pred.numpy()
			
 
				+        pred = pred[:, 0, :, :]
			
 
				+        segmentation = pred > self.thresh
			
 
				+
			
 
				+        boxes_batch = []
			
 
				+        for batch_index in range(pred.shape[0]):
			
 
				+            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
			
 
				+            if self.dilation_kernel is not None:
			
 
				+                mask = cv2.dilate(
			
 
				+                    np.array(segmentation[batch_index]).astype(np.uint8),
			
 
				+                    self.dilation_kernel)
			
 
				+            else:
			
 
				+                mask = segmentation[batch_index]
			
 
				+            if self.box_type == 'poly':
			
 
				+                boxes, scores = self.polygons_from_bitmap(pred[batch_index],
			
 
				+                                                          mask, src_w, src_h)
			
 
				+            elif self.box_type == 'quad':
			
 
				+                boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
			
 
				+                                                       src_w, src_h)
			
 
				+            else:
			
 
				+                raise ValueError("box_type can only be one of ['quad', 'poly']")
			
 
				+
			
 
				+            boxes_batch.append({'points': boxes})
			
 
				+        return boxes_batch
			
 
				+
			
 
				+
			
 
				+class DistillationDBPostProcess(object):
			
 
				+    def __init__(self,
			
 
				+                 model_name=["student"],
			
 
				+                 key=None,
			
 
				+                 thresh=0.3,
			
 
				+                 box_thresh=0.6,
			
 
				+                 max_candidates=1000,
			
 
				+                 unclip_ratio=1.5,
			
 
				+                 use_dilation=False,
			
 
				+                 score_mode="fast",
			
 
				+                 box_type='quad',
			
 
				+                 **kwargs):
			
 
				+        self.model_name = model_name
			
 
				+        self.key = key
			
 
				+        self.post_process = DBPostProcess(
			
 
				+            thresh=thresh,
			
 
				+            box_thresh=box_thresh,
			
 
				+            max_candidates=max_candidates,
			
 
				+            unclip_ratio=unclip_ratio,
			
 
				+            use_dilation=use_dilation,
			
 
				+            score_mode=score_mode,
			
 
				+            box_type=box_type)
			
 
				+
			
 
				+    def __call__(self, predicts, shape_list):
			
 
				+        results = {}
			
 
				+        for k in self.model_name:
			
 
				+            results[k] = self.post_process(predicts[k], shape_list=shape_list)
			
 
				+        return results
			
--- a/python/onnxocr/onnxocr/fonts/simfang.ttf
+++ b/python/onnxocr/onnxocr/fonts/simfang.ttf
--- a/python/onnxocr/onnxocr/imaug.py
+++ b/python/onnxocr/onnxocr/imaug.py
@@ -0,0 +1,32 @@
 
				+from .operators import *
			
 
				+
			
 
				+
			
 
				+def transform(data, ops=None):
			
 
				+    """transform"""
			
 
				+    if ops is None:
			
 
				+        ops = []
			
 
				+    for op in ops:
			
 
				+        data = op(data)
			
 
				+        if data is None:
			
 
				+            return None
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def create_operators(op_param_list, global_config=None):
			
 
				+    """
			
 
				+    create operators based on the config
			
 
				+
			
 
				+    Args:
			
 
				+        params(list): a dict list, used to create some operators
			
 
				+    """
			
 
				+    assert isinstance(op_param_list, list), "operator config should be a list"
			
 
				+    ops = []
			
 
				+    for operator in op_param_list:
			
 
				+        assert isinstance(operator, dict) and len(operator) == 1, "yaml format error"
			
 
				+        op_name = list(operator)[0]
			
 
				+        param = {} if operator[op_name] is None else operator[op_name]
			
 
				+        if global_config is not None:
			
 
				+            param.update(global_config)
			
 
				+        op = eval(op_name)(**param)
			
 
				+        ops.append(op)
			
 
				+    return ops
			
--- a/python/onnxocr/onnxocr/logger.py
+++ b/python/onnxocr/onnxocr/logger.py
@@ -0,0 +1,45 @@
 
				+import logging
			
 
				+
			
 
				+LogName = 'Umi-OCR_log'
			
 
				+LogFileName = 'Umi-OCR_debug.log'
			
 
				+
			
 
				+
			
 
				+class Logger:
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.initLogger()
			
 
				+
			
 
				+    def initLogger(self):
			
 
				+        '''初始化日志'''
			
 
				+
			
 
				+        # 日志
			
 
				+        self.logger = logging.getLogger(LogName)
			
 
				+        self.logger.setLevel(logging.DEBUG)
			
 
				+
			
 
				+        # 控制台
			
 
				+        streamHandler = logging.StreamHandler()
			
 
				+        streamHandler.setLevel(logging.DEBUG)
			
 
				+        formatPrint = logging.Formatter(
			
 
				+            '【%(levelname)s】 %(message)s')
			
 
				+        streamHandler.setFormatter(formatPrint)
			
 
				+        # self.logger.addHandler(streamHandler)
			
 
				+
			
 
				+        return
			
 
				+        # 日志文件
			
 
				+        fileHandler = logging.FileHandler(LogFileName)
			
 
				+        fileHandler.setLevel(logging.ERROR)
			
 
				+        formatFile = logging.Formatter(
			
 
				+            '''
			
 
				+【%(levelname)s】 %(asctime)s
			
 
				+%(message)s
			
 
				+    文件：%(module)s | 函数：%(funcName)s | 行号：%(lineno)d
			
 
				+    线程id：%(thread)d | 线程名：%(thread)s''')
			
 
				+        fileHandler.setFormatter(formatFile)
			
 
				+        self.logger.addHandler(fileHandler)
			
 
				+
			
 
				+
			
 
				+LOG = Logger()
			
 
				+
			
 
				+
			
 
				+def GetLog():
			
 
				+    return LOG.logger
			
--- a/python/onnxocr/onnxocr/models/ch_ppocr_server_v2.0/cls/cls.onnx
+++ b/python/onnxocr/onnxocr/models/ch_ppocr_server_v2.0/cls/cls.onnx
--- a/python/onnxocr/onnxocr/models/ch_ppocr_server_v2.0/det/det.onnx
+++ b/python/onnxocr/onnxocr/models/ch_ppocr_server_v2.0/det/det.onnx
--- a/python/onnxocr/onnxocr/models/ppocrv4/cls/cls.onnx
+++ b/python/onnxocr/onnxocr/models/ppocrv4/cls/cls.onnx
--- a/python/onnxocr/onnxocr/models/ppocrv4/det/det.onnx
+++ b/python/onnxocr/onnxocr/models/ppocrv4/det/det.onnx
--- a/python/onnxocr/onnxocr/models/ppocrv4/rec/rec.onnx
+++ b/python/onnxocr/onnxocr/models/ppocrv4/rec/rec.onnx
--- a/python/onnxocr/onnxocr/models/ppocrv5/cls/cls.onnx
+++ b/python/onnxocr/onnxocr/models/ppocrv5/cls/cls.onnx
--- a/python/onnxocr/onnxocr/models/ppocrv5/det/det.onnx
+++ b/python/onnxocr/onnxocr/models/ppocrv5/det/det.onnx
--- a/python/onnxocr/onnxocr/models/ppocrv5/rec/rec.onnx
+++ b/python/onnxocr/onnxocr/models/ppocrv5/rec/rec.onnx
--- a/python/onnxocr/onnxocr/ocr_images_pdfs.py
+++ b/python/onnxocr/onnxocr/ocr_images_pdfs.py
@@ -0,0 +1,268 @@
 
				+# logic.py
			
 
				+import sys
			
 
				+import os
			
 
				+# 添加父目录到sys.path，便于导入onnxocr包
			
 
				+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
			
 
				+from onnxocr.onnx_paddleocr import ONNXPaddleOcr, sav2Img
			
 
				+import cv2
			
 
				+from typing import List, Callable
			
 
				+from pathlib import Path
			
 
				+import time
			
 
				+import numpy as np
			
 
				+
			
 
				+# 尝试导入pdf2image用于PDF转图片
			
 
				+try:
			
 
				+    from pdf2image import convert_from_path
			
 
				+except ImportError:
			
 
				+    convert_from_path = None
			
 
				+
			
 
				+# 尝试导入pymupdf用于PDF转图片
			
 
				+try:
			
 
				+    import fitz  # pymupdf
			
 
				+    def pdf_to_images(pdf_path, dpi=200):
			
 
				+        """
			
 
				+        使用pymupdf将PDF每一页转为图片（numpy数组）
			
 
				+        """
			
 
				+        doc = fitz.open(pdf_path)
			
 
				+        images = []
			
 
				+        for page in doc:
			
 
				+            pix = page.get_pixmap(dpi=dpi)
			
 
				+            img = np.frombuffer(pix.samples, dtype=np.uint8)
			
 
				+            img = img.reshape((pix.height, pix.width, pix.n))
			
 
				+            if pix.n == 4:
			
 
				+                img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
			
 
				+            images.append(img)
			
 
				+        return images
			
 
				+except ImportError:
			
 
				+    pdf_to_images = None
			
 
				+
			
 
				+class OCRLogic:
			
 
				+    """
			
 
				+    OCR 业务逻辑主类，支持批量图片/PDF识别，多线程加速，模型热切换等
			
 
				+    """
			
 
				+    def __init__(self, status_callback: Callable[[str], None]):
			
 
				+        """
			
 
				+        初始化，传入状态回调函数用于UI进度提示
			
 
				+        """
			
 
				+        self.status_callback = status_callback
			
 
				+        # 默认初始化OCR模型
			
 
				+        self.model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
			
 
				+
			
 
				+    def run(self, files: List[str], save_txt: bool, merge_txt: bool, output_img: bool = False, file_time_callback=None, pdf_progress_callback=None, max_workers: int = 4):
			
 
				+        """
			
 
				+        批量图片/PDF识别主入口，支持多线程加速
			
 
				+        files: 待识别文件路径列表
			
 
				+        save_txt: 是否保存txt
			
 
				+        merge_txt: 是否合并为一个txt
			
 
				+        output_img: 是否输出带框图片
			
 
				+        file_time_callback: 单文件识别耗时回调
			
 
				+        pdf_progress_callback: PDF页进度回调
			
 
				+        max_workers: 最大线程数，默认4
			
 
				+        """
			
 
				+        import concurrent.futures
			
 
				+        start_time = time.time()
			
 
				+        all_text = [None] * len(files)  # 用于顺序合并结果
			
 
				+        def process_one(idx_file):
			
 
				+            idx, file = idx_file
			
 
				+            ext = os.path.splitext(file)[1].lower()
			
 
				+            self.status_callback(f"正在处理: {os.path.basename(file)} ({idx+1}/{len(files)})")
			
 
				+            t0 = time.time()
			
 
				+            text = ""
			
 
				+            if ext == ".pdf":
			
 
				+                # PDF转图片后识别
			
 
				+                if pdf_to_images is None:
			
 
				+                    raise RuntimeError("未安装pymupdf库，无法处理PDF文件。请先安装pymupdf。")
			
 
				+                images = pdf_to_images(file, dpi=300)
			
 
				+                text = self._ocr_images(images, file, save_txt, merge_txt, output_img=output_img, is_pdf=True, pdf_progress_callback=pdf_progress_callback, max_workers=max_workers)
			
 
				+            else:
			
 
				+                # 普通图片识别，兼容中文路径
			
 
				+                try:
			
 
				+                    if file.lower().endswith('.bmp'):
			
 
				+                        img = cv2.imdecode(np.fromfile(file, dtype=np.uint8), cv2.IMREAD_COLOR)
			
 
				+                    else:
			
 
				+                        with open(file, 'rb') as fimg:
			
 
				+                            img_array = np.frombuffer(fimg.read(), np.uint8)
			
 
				+                        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
			
 
				+                except Exception as e:
			
 
				+                    self.status_callback(f"图片读取失败: {file}，错误: {e}")
			
 
				+                    if file_time_callback:
			
 
				+                        file_time_callback(idx, 0)
			
 
				+                    return (idx, "")
			
 
				+                if img is None:
			
 
				+                    self.status_callback(f"文件无法读取或不是有效图片: {file}")
			
 
				+                    if file_time_callback:
			
 
				+                        file_time_callback(idx, 0)
			
 
				+                    return (idx, "")
			
 
				+                text = self._ocr_image(img, file, save_txt, output_img=output_img)
			
 
				+            t1 = time.time()
			
 
				+            if file_time_callback:
			
 
				+                file_time_callback(idx, t1-t0)
			
 
				+            self.status_callback(f"{os.path.basename(file)} 识别用时: {t1-t0:.2f} 秒")
			
 
				+            if len(files) > 1:
			
 
				+                avg = (t1 - start_time) / (idx + 1)
			
 
				+                self.status_callback(f"已完成 {idx+1}/{len(files)}，平均单张用时: {avg:.2f} 秒")
			
 
				+            return (idx, text)
			
 
				+        # 多线程处理所有文件，结果按索引回填，保证顺序
			
 
				+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+            futures = [executor.submit(process_one, (idx, file)) for idx, file in enumerate(files)]
			
 
				+            for future in concurrent.futures.as_completed(futures):
			
 
				+                idx, text = future.result()
			
 
				+                all_text[idx] = text
			
 
				+        # 合并写入txt
			
 
				+        if save_txt and merge_txt and len(files) > 1:
			
 
				+            out_dir = self._get_output_dir(files[0])
			
 
				+            timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				+            out_txt = os.path.join(out_dir, f"merged_ocr_{timestamp}.txt")
			
 
				+            with open(out_txt, "w", encoding="utf-8") as f:
			
 
				+                for text in all_text:
			
 
				+                    if text:
			
 
				+                        f.write(text)
			
 
				+                        f.write("\n\n")
			
 
				+        elapsed = time.time() - start_time
			
 
				+        if files:
			
 
				+            out_dir = self._get_output_dir(files[0])
			
 
				+            self.status_callback(f"识别完成，总耗时：{elapsed:.2f}秒，文件保存在：{out_dir}")
			
 
				+        else:
			
 
				+            self.status_callback(f"识别完成，总耗时：{elapsed:.2f}秒")
			
 
				+
			
 
				+    def _ocr_images(self, images, pdf_path, save_txt, merge_txt, output_img=False, is_pdf=False, pdf_progress_callback=None, max_workers: int = 4):
			
 
				+        """
			
 
				+        PDF转图片后，批量图片识别，支持多线程加速
			
 
				+        images: PDF每页图片（numpy数组）
			
 
				+        pdf_path: 原PDF路径
			
 
				+        save_txt: 是否保存txt
			
 
				+        merge_txt: 是否合并txt（未用）
			
 
				+        output_img: 是否输出带框图片
			
 
				+        pdf_progress_callback: 页进度回调
			
 
				+        max_workers: 最大线程数，默认4
			
 
				+        """
			
 
				+        import concurrent.futures
			
 
				+        out_dir = self._get_output_dir(pdf_path)
			
 
				+        pdf_text = [None] * len(images)
			
 
				+        timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				+        total = len(images)
			
 
				+        def process_page(i_img):
			
 
				+            i, img = i_img
			
 
				+            img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
			
 
				+            result = self.model.ocr(img_cv)
			
 
				+            if output_img:
			
 
				+                out_img_path = os.path.join(out_dir, f"{Path(pdf_path).stem}_page{i+1}_ocr.jpg")
			
 
				+                sav2Img(img_cv, result, name=out_img_path)
			
 
				+            page_text = self._result_to_text(result)
			
 
				+            return (i, page_text)
			
 
				+        # 多线程识别每一页，结果按页码顺序合并
			
 
				+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+            futures = [executor.submit(process_page, (i, img)) for i, img in enumerate(images)]
			
 
				+            for future in concurrent.futures.as_completed(futures):
			
 
				+                i, page_text = future.result()
			
 
				+                pdf_text[i] = page_text
			
 
				+                if pdf_progress_callback:
			
 
				+                    pdf_progress_callback(i + 1, total)
			
 
				+        if save_txt:
			
 
				+            txt_path = os.path.join(out_dir, f"{Path(pdf_path).stem}_ocr_{timestamp}.txt")
			
 
				+            with open(txt_path, "w", encoding="utf-8") as f:
			
 
				+                f.write("\n\n".join(pdf_text))
			
 
				+        return "\n\n".join(pdf_text)
			
 
				+
			
 
				+    def _ocr_image(self, img, img_path, save_txt, output_img=False):
			
 
				+        """
			
 
				+        单张图片OCR识别，支持保存txt和输出带框图片
			
 
				+        """
			
 
				+        out_dir = self._get_output_dir(img_path)
			
 
				+        result = self.model.ocr(img)
			
 
				+        if output_img:
			
 
				+            out_img_path = os.path.join(out_dir, f"{Path(img_path).stem}_ocr.jpg")
			
 
				+            sav2Img(img, result, name=out_img_path)
			
 
				+        text = self._result_to_text(result)
			
 
				+        if save_txt:
			
 
				+            timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				+            txt_path = os.path.join(out_dir, f"{Path(img_path).stem}_ocr_{timestamp}.txt")
			
 
				+            with open(txt_path, "w", encoding="utf-8") as f:
			
 
				+                f.write(text)
			
 
				+        return text
			
 
				+
			
 
				+    def _result_to_text(self, result):
			
 
				+        """
			
 
				+        将OCR识别结果结构化为纯文本，兼容只检测无识别内容的情况
			
 
				+        """
			
 
				+        # 健壮性检查，防止result为空或结构异常
			
 
				+        if not result or not isinstance(result, list) or not result[0] or not isinstance(result[0], list):
			
 
				+            return "[未检测到内容]"
			
 
				+        lines = []
			
 
				+        for box in result[0]:
			
 
				+            # 兼容只检测无识别内容的情况
			
 
				+            if isinstance(box, list) and len(box) == 2 and isinstance(box[1], (list, tuple)) and len(box[1]) >= 1:
			
 
				+                lines.append(str(box[1][0]))
			
 
				+            elif isinstance(box, list) and (isinstance(box[0], (list, tuple)) or isinstance(box[0], float)):
			
 
				+                # 只有检测框，无识别内容
			
 
				+                lines.append("[未识别] " + str(box))
			
 
				+            else:
			
 
				+                lines.append(str(box))
			
 
				+        return "\n".join(lines)
			
 
				+
			
 
				+    def _get_output_dir(self, file_path):
			
 
				+        """
			
 
				+        获取输出目录，自动创建
			
 
				+        """
			
 
				+        base_dir = os.path.dirname(file_path)
			
 
				+        out_dir = os.path.join(base_dir, "Output_OCR")
			
 
				+        os.makedirs(out_dir, exist_ok=True)
			
 
				+        return out_dir
			
 
				+
			
 
				+    def set_model(self, model_name, use_gpu=False):
			
 
				+        """
			
 
				+        切换OCR模型，支持多模型热切换，所有模型统一用ppocrv5字典
			
 
				+        use_gpu: 是否启用GPU
			
 
				+        """
			
 
				+        import os
			
 
				+        import tkinter.messagebox as messagebox
			
 
				+        base_model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "onnxocr", "models"))
			
 
				+        model_map = {
			
 
				+            "PP-OCRv5": "ppocrv5",
			
 
				+            "PP-OCRv4": "ppocrv4",
			
 
				+            "ch_ppocr_server_v2.0": "ch_ppocr_server_v2.0"
			
 
				+        }
			
 
				+        model_dir = model_map.get(model_name, "ppocrv5")
			
 
				+        model_path = os.path.join(base_model_dir, model_dir)
			
 
				+        det_model_dir = os.path.join(model_path, "det", "det.onnx")
			
 
				+        cls_model_dir = os.path.join(model_path, "cls", "cls.onnx")
			
 
				+        rec_char_dict_path = os.path.join(base_model_dir, "ppocrv5", "ppocrv5_dict.txt")
			
 
				+        rec_model_dir = os.path.join(model_path, "rec", "rec.onnx") if os.path.exists(os.path.join(model_path, "rec", "rec.onnx")) else None
			
 
				+        ocr_kwargs = dict(
			
 
				+            use_angle_cls=True,
			
 
				+            use_gpu=use_gpu,  # 关键：传递GPU参数
			
 
				+            det_model_dir=det_model_dir,
			
 
				+            cls_model_dir=cls_model_dir,
			
 
				+            rec_char_dict_path=rec_char_dict_path
			
 
				+        )
			
 
				+        if rec_model_dir and os.path.exists(rec_model_dir):
			
 
				+            ocr_kwargs["rec_model_dir"] = rec_model_dir
			
 
				+        try:
			
 
				+            self.model = ONNXPaddleOcr(**ocr_kwargs)
			
 
				+            if use_gpu:
			
 
				+                try:
			
 
				+                    import onnxruntime as ort
			
 
				+                    providers = self.model.session.get_providers() if hasattr(self.model, 'session') else []
			
 
				+                    if not any('CUDA' in p for p in providers):
			
 
				+                        msg = ("未检测到可用GPU，已自动切换为CPU推理。请检查CUDA/cuDNN环境配置。")
			
 
				+                        if hasattr(self, 'ui_ref') and hasattr(self.ui_ref, 'update_gpu_status'):
			
 
				+                            self.ui_ref.update_gpu_status(msg)
			
 
				+                        if hasattr(self, 'status_callback'):
			
 
				+                            self.status_callback("[警告] 未检测到可用GPU，已切换为CPU推理。请检查CUDA/cuDNN环境配置。")
			
 
				+                except Exception:
			
 
				+                    msg = ("检测GPU状态时发生异常，可能未正确安装CUDA/cuDNN或onnxruntime-gpu。已自动切换为CPU推理。")
			
 
				+                    if hasattr(self, 'ui_ref') and hasattr(self.ui_ref, 'update_gpu_status'):
			
 
				+                        self.ui_ref.update_gpu_status(msg)
			
 
				+                    if hasattr(self, 'status_callback'):
			
 
				+                        self.status_callback("[警告] GPU检测异常，已切换为CPU推理。请检查CUDA/cuDNN环境配置。")
			
 
				+        except Exception as e:
			
 
				+            if use_gpu:
			
 
				+                msg = f"GPU初始化失败，已自动切换为CPU。请检查CUDA/cuDNN环境配置。错误信息: {e}"
			
 
				+                if hasattr(self, 'ui_ref') and hasattr(self.ui_ref, 'update_gpu_status'):
			
 
				+                    self.ui_ref.update_gpu_status(msg)
			
 
				+                if hasattr(self, 'status_callback'):
			
 
				+                    self.status_callback("[警告] GPU初始化失败，已切换为CPU推理。请检查CUDA/cuDNN环境配置。")
			
 
				+                ocr_kwargs["use_gpu"] = False
			
 
				+                self.model = ONNXPaddleOcr(**ocr_kwargs)
			
 
				+            else:
			
 
				+                raise
			
--- a/python/onnxocr/onnxocr/onnx_paddleocr.py
+++ b/python/onnxocr/onnxocr/onnx_paddleocr.py
@@ -0,0 +1,96 @@
 
				+import time
			
 
				+
			
 
				+from .predict_system import TextSystem
			
 
				+from .utils import infer_args as init_args
			
 
				+from .utils import str2bool, draw_ocr
			
 
				+import argparse
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+class ONNXPaddleOcr(TextSystem):
			
 
				+    def __init__(self, **kwargs):
			
 
				+        # 默认参数
			
 
				+        parser = init_args()
			
 
				+        inference_args_dict = {}
			
 
				+        for action in parser._actions:
			
 
				+            inference_args_dict[action.dest] = action.default
			
 
				+        params = argparse.Namespace(**inference_args_dict)
			
 
				+
			
 
				+        # params.rec_image_shape = "3, 32, 320"
			
 
				+        params.rec_image_shape = "3, 48, 320"
			
 
				+
			
 
				+        # 根据传入的参数覆盖更新默认参数
			
 
				+        params.__dict__.update(**kwargs)
			
 
				+
			
 
				+        # 初始化模型
			
 
				+        super().__init__(params)
			
 
				+
			
 
				+    def ocr(self, img, det=True, rec=True, cls=True):
			
 
				+        if cls == True and self.use_angle_cls == False:
			
 
				+            print(
			
 
				+                "Since the angle classifier is not initialized, the angle classifier will not be used during the forward process"
			
 
				+            )
			
 
				+
			
 
				+        if det and rec:
			
 
				+            ocr_res = []
			
 
				+            dt_boxes, rec_res = self.__call__(img, cls)
			
 
				+            tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
			
 
				+            ocr_res.append(tmp_res)
			
 
				+            return ocr_res
			
 
				+        elif det and not rec:
			
 
				+            ocr_res = []
			
 
				+            dt_boxes = self.text_detector(img)
			
 
				+            tmp_res = [box.tolist() for box in dt_boxes]
			
 
				+            ocr_res.append(tmp_res)
			
 
				+            return ocr_res
			
 
				+        else:
			
 
				+            ocr_res = []
			
 
				+            cls_res = []
			
 
				+
			
 
				+            if not isinstance(img, list):
			
 
				+                img = [img]
			
 
				+            if self.use_angle_cls and cls:
			
 
				+                img, cls_res_tmp = self.text_classifier(img)
			
 
				+                if not rec:
			
 
				+                    cls_res.append(cls_res_tmp)
			
 
				+            rec_res = self.text_recognizer(img)
			
 
				+            ocr_res.append(rec_res)
			
 
				+
			
 
				+            if not rec:
			
 
				+                return cls_res
			
 
				+            return ocr_res
			
 
				+
			
 
				+
			
 
				+def sav2Img(org_img, result, name="draw_ocr.jpg"):
			
 
				+    # 显示结果
			
 
				+    from PIL import Image
			
 
				+
			
 
				+    result = result[0]
			
 
				+    # image = Image.open(img_path).convert('RGB')
			
 
				+    # 图像转BGR2RGB
			
 
				+    image = org_img[:, :, ::-1]
			
 
				+    boxes = [line[0] for line in result]
			
 
				+    txts = [line[1][0] for line in result]
			
 
				+    scores = [line[1][1] for line in result]
			
 
				+    im_show = draw_ocr(image, boxes, txts, scores)
			
 
				+    im_show = Image.fromarray(im_show)
			
 
				+    im_show.save(name)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import cv2
			
 
				+
			
 
				+    model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
			
 
				+
			
 
				+    img = cv2.imread(
			
 
				+        "/data2/liujingsong3/fiber_box/test/img/20230531230052008263304.jpg"
			
 
				+    )
			
 
				+    s = time.time()
			
 
				+    result = model.ocr(img)
			
 
				+    e = time.time()
			
 
				+    print("total time: {:.3f}".format(e - s))
			
 
				+    print("result:", result)
			
 
				+    for box in result[0]:
			
 
				+        print(box)
			
 
				+
			
 
				+    sav2Img(img, result)
			
--- a/python/onnxocr/onnxocr/operators.py
+++ b/python/onnxocr/onnxocr/operators.py
@@ -0,0 +1,187 @@
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import sys
			
 
				+import math
			
 
				+
			
 
				+
			
 
				+class NormalizeImage(object):
			
 
				+    """ normalize image such as substract mean, divide std
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
			
 
				+        if isinstance(scale, str):
			
 
				+            scale = eval(scale)
			
 
				+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
			
 
				+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
			
 
				+        std = std if std is not None else [0.229, 0.224, 0.225]
			
 
				+
			
 
				+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
			
 
				+        self.mean = np.array(mean).reshape(shape).astype('float32')
			
 
				+        self.std = np.array(std).reshape(shape).astype('float32')
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        img = data['image']
			
 
				+        from PIL import Image
			
 
				+        if isinstance(img, Image.Image):
			
 
				+            img = np.array(img)
			
 
				+        assert isinstance(img,
			
 
				+                          np.ndarray), "invalid input 'img' in NormalizeImage"
			
 
				+        data['image'] = (
			
 
				+            img.astype('float32') * self.scale - self.mean) / self.std
			
 
				+        return data
			
 
				+
			
 
				+
			
 
				+class DetResizeForTest(object):
			
 
				+    def __init__(self, **kwargs):
			
 
				+        super(DetResizeForTest, self).__init__()
			
 
				+        self.resize_type = 0
			
 
				+        self.keep_ratio = False
			
 
				+        if 'image_shape' in kwargs:
			
 
				+            self.image_shape = kwargs['image_shape']
			
 
				+            self.resize_type = 1
			
 
				+            if 'keep_ratio' in kwargs:
			
 
				+                self.keep_ratio = kwargs['keep_ratio']
			
 
				+        elif 'limit_side_len' in kwargs:
			
 
				+            self.limit_side_len = kwargs['limit_side_len']
			
 
				+            self.limit_type = kwargs.get('limit_type', 'min')
			
 
				+        elif 'resize_long' in kwargs:
			
 
				+            self.resize_type = 2
			
 
				+            self.resize_long = kwargs.get('resize_long', 960)
			
 
				+        else:
			
 
				+            self.limit_side_len = 736
			
 
				+            self.limit_type = 'min'
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        img = data['image']
			
 
				+        src_h, src_w, _ = img.shape
			
 
				+        if sum([src_h, src_w]) < 64:
			
 
				+            img = self.image_padding(img)
			
 
				+
			
 
				+        if self.resize_type == 0:
			
 
				+            # img, shape = self.resize_image_type0(img)
			
 
				+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
			
 
				+        elif self.resize_type == 2:
			
 
				+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
			
 
				+        else:
			
 
				+            # img, shape = self.resize_image_type1(img)
			
 
				+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
			
 
				+        data['image'] = img
			
 
				+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
			
 
				+        return data
			
 
				+
			
 
				+    def image_padding(self, im, value=0):
			
 
				+        h, w, c = im.shape
			
 
				+        im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
			
 
				+        im_pad[:h, :w, :] = im
			
 
				+        return im_pad
			
 
				+
			
 
				+    def resize_image_type1(self, img):
			
 
				+        resize_h, resize_w = self.image_shape
			
 
				+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
			
 
				+        if self.keep_ratio is True:
			
 
				+            resize_w = ori_w * resize_h / ori_h
			
 
				+            N = math.ceil(resize_w / 32)
			
 
				+            resize_w = N * 32
			
 
				+        ratio_h = float(resize_h) / ori_h
			
 
				+        ratio_w = float(resize_w) / ori_w
			
 
				+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				+        # return img, np.array([ori_h, ori_w])
			
 
				+        return img, [ratio_h, ratio_w]
			
 
				+
			
 
				+    def resize_image_type0(self, img):
			
 
				+        """
			
 
				+        resize image to a size multiple of 32 which is required by the network
			
 
				+        args:
			
 
				+            img(array): array with shape [h, w, c]
			
 
				+        return(tuple):
			
 
				+            img, (ratio_h, ratio_w)
			
 
				+        """
			
 
				+        limit_side_len = self.limit_side_len
			
 
				+        h, w, c = img.shape
			
 
				+
			
 
				+        # limit the max side
			
 
				+        if self.limit_type == 'max':
			
 
				+            if max(h, w) > limit_side_len:
			
 
				+                if h > w:
			
 
				+                    ratio = float(limit_side_len) / h
			
 
				+                else:
			
 
				+                    ratio = float(limit_side_len) / w
			
 
				+            else:
			
 
				+                ratio = 1.
			
 
				+        elif self.limit_type == 'min':
			
 
				+            if min(h, w) < limit_side_len:
			
 
				+                if h < w:
			
 
				+                    ratio = float(limit_side_len) / h
			
 
				+                else:
			
 
				+                    ratio = float(limit_side_len) / w
			
 
				+            else:
			
 
				+                ratio = 1.
			
 
				+        elif self.limit_type == 'resize_long':
			
 
				+            ratio = float(limit_side_len) / max(h, w)
			
 
				+        else:
			
 
				+            raise Exception('not support limit type, image ')
			
 
				+        resize_h = int(h * ratio)
			
 
				+        resize_w = int(w * ratio)
			
 
				+
			
 
				+        resize_h = max(int(round(resize_h / 32) * 32), 32)
			
 
				+        resize_w = max(int(round(resize_w / 32) * 32), 32)
			
 
				+
			
 
				+        try:
			
 
				+            if int(resize_w) <= 0 or int(resize_h) <= 0:
			
 
				+                return None, (None, None)
			
 
				+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				+        except:
			
 
				+            print(img.shape, resize_w, resize_h)
			
 
				+            sys.exit(0)
			
 
				+        ratio_h = resize_h / float(h)
			
 
				+        ratio_w = resize_w / float(w)
			
 
				+        return img, [ratio_h, ratio_w]
			
 
				+
			
 
				+    def resize_image_type2(self, img):
			
 
				+        h, w, _ = img.shape
			
 
				+
			
 
				+        resize_w = w
			
 
				+        resize_h = h
			
 
				+
			
 
				+        if resize_h > resize_w:
			
 
				+            ratio = float(self.resize_long) / resize_h
			
 
				+        else:
			
 
				+            ratio = float(self.resize_long) / resize_w
			
 
				+
			
 
				+        resize_h = int(resize_h * ratio)
			
 
				+        resize_w = int(resize_w * ratio)
			
 
				+
			
 
				+        max_stride = 128
			
 
				+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
			
 
				+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
			
 
				+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				+        ratio_h = resize_h / float(h)
			
 
				+        ratio_w = resize_w / float(w)
			
 
				+
			
 
				+        return img, [ratio_h, ratio_w]
			
 
				+
			
 
				+class ToCHWImage(object):
			
 
				+    """ convert hwc image to chw image
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, **kwargs):
			
 
				+        pass
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        img = data['image']
			
 
				+        from PIL import Image
			
 
				+        if isinstance(img, Image.Image):
			
 
				+            img = np.array(img)
			
 
				+        data['image'] = img.transpose((2, 0, 1))
			
 
				+        return data
			
 
				+
			
 
				+
			
 
				+class KeepKeys(object):
			
 
				+    def __init__(self, keep_keys, **kwargs):
			
 
				+        self.keep_keys = keep_keys
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        data_list = []
			
 
				+        for key in self.keep_keys:
			
 
				+            data_list.append(data[key])
			
 
				+        return data_list
			
--- a/python/onnxocr/onnxocr/predict_base.py
+++ b/python/onnxocr/onnxocr/predict_base.py
@@ -0,0 +1,52 @@
 
				+import onnxruntime
			
 
				+
			
 
				+class PredictBase(object):
			
 
				+    def __init__(self):
			
 
				+        pass
			
 
				+
			
 
				+    def get_onnx_session(self, model_dir, use_gpu, gpu_id = 0):
			
 
				+        # 使用gpu
			
 
				+        if use_gpu:
			
 
				+            providers =[('CUDAExecutionProvider',{"cudnn_conv_algo_search": "DEFAULT","device_id": gpu_id}),'CPUExecutionProvider']
			
 
				+        else:
			
 
				+            providers =['CPUExecutionProvider']
			
 
				+
			
 
				+        onnx_session = onnxruntime.InferenceSession(model_dir, None,providers=providers)
			
 
				+
			
 
				+        # print("providers:", onnxruntime.get_device())
			
 
				+        return onnx_session
			
 
				+
			
 
				+
			
 
				+    def get_output_name(self, onnx_session):
			
 
				+        """
			
 
				+        output_name = onnx_session.get_outputs()[0].name
			
 
				+        :param onnx_session:
			
 
				+        :return:
			
 
				+        """
			
 
				+        output_name = []
			
 
				+        for node in onnx_session.get_outputs():
			
 
				+            output_name.append(node.name)
			
 
				+        return output_name
			
 
				+
			
 
				+    def get_input_name(self, onnx_session):
			
 
				+        """
			
 
				+        input_name = onnx_session.get_inputs()[0].name
			
 
				+        :param onnx_session:
			
 
				+        :return:
			
 
				+        """
			
 
				+        input_name = []
			
 
				+        for node in onnx_session.get_inputs():
			
 
				+            input_name.append(node.name)
			
 
				+        return input_name
			
 
				+
			
 
				+    def get_input_feed(self, input_name, image_numpy):
			
 
				+        """
			
 
				+        input_feed={self.input_name: image_numpy}
			
 
				+        :param input_name:
			
 
				+        :param image_numpy:
			
 
				+        :return:
			
 
				+        """
			
 
				+        input_feed = {}
			
 
				+        for name in input_name:
			
 
				+            input_feed[name] = image_numpy
			
 
				+        return input_feed
			
--- a/python/onnxocr/onnxocr/predict_cls.py
+++ b/python/onnxocr/onnxocr/predict_cls.py
@@ -0,0 +1,89 @@
 
				+import cv2
			
 
				+import copy
			
 
				+import numpy as np
			
 
				+import math
			
 
				+
			
 
				+from .cls_postprocess import ClsPostProcess
			
 
				+from .predict_base import PredictBase
			
 
				+
			
 
				+
			
 
				+class TextClassifier(PredictBase):
			
 
				+    def __init__(self, args):
			
 
				+        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
			
 
				+        self.cls_batch_num = args.cls_batch_num
			
 
				+        self.cls_thresh = args.cls_thresh
			
 
				+        self.postprocess_op = ClsPostProcess(label_list=args.label_list)
			
 
				+
			
 
				+        # 初始化模型
			
 
				+        self.cls_onnx_session = self.get_onnx_session(args.cls_model_dir, args.use_gpu, gpu_id = args.gpu_id)
			
 
				+        self.cls_input_name = self.get_input_name(self.cls_onnx_session)
			
 
				+        self.cls_output_name = self.get_output_name(self.cls_onnx_session)
			
 
				+
			
 
				+    def resize_norm_img(self, img):
			
 
				+        imgC, imgH, imgW = self.cls_image_shape
			
 
				+        h = img.shape[0]
			
 
				+        w = img.shape[1]
			
 
				+        ratio = w / float(h)
			
 
				+        if math.ceil(imgH * ratio) > imgW:
			
 
				+            resized_w = imgW
			
 
				+        else:
			
 
				+            resized_w = int(math.ceil(imgH * ratio))
			
 
				+        resized_image = cv2.resize(img, (resized_w, imgH))
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+        if self.cls_image_shape[0] == 1:
			
 
				+            resized_image = resized_image / 255
			
 
				+            resized_image = resized_image[np.newaxis, :]
			
 
				+        else:
			
 
				+            resized_image = resized_image.transpose((2, 0, 1)) / 255
			
 
				+        resized_image -= 0.5
			
 
				+        resized_image /= 0.5
			
 
				+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
			
 
				+        padding_im[:, :, 0:resized_w] = resized_image
			
 
				+        return padding_im
			
 
				+
			
 
				+    def __call__(self, img_list):
			
 
				+        img_list = copy.deepcopy(img_list)
			
 
				+        img_num = len(img_list)
			
 
				+        # Calculate the aspect ratio of all text bars
			
 
				+        width_list = []
			
 
				+        for img in img_list:
			
 
				+            width_list.append(img.shape[1] / float(img.shape[0]))
			
 
				+        # Sorting can speed up the cls process
			
 
				+        indices = np.argsort(np.array(width_list))
			
 
				+
			
 
				+        cls_res = [["", 0.0]] * img_num
			
 
				+        batch_num = self.cls_batch_num
			
 
				+
			
 
				+        for beg_img_no in range(0, img_num, batch_num):
			
 
				+
			
 
				+            end_img_no = min(img_num, beg_img_no + batch_num)
			
 
				+            norm_img_batch = []
			
 
				+            max_wh_ratio = 0
			
 
				+
			
 
				+            for ino in range(beg_img_no, end_img_no):
			
 
				+                h, w = img_list[indices[ino]].shape[0:2]
			
 
				+                wh_ratio = w * 1.0 / h
			
 
				+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
			
 
				+            for ino in range(beg_img_no, end_img_no):
			
 
				+                norm_img = self.resize_norm_img(img_list[indices[ino]])
			
 
				+                norm_img = norm_img[np.newaxis, :]
			
 
				+                norm_img_batch.append(norm_img)
			
 
				+            norm_img_batch = np.concatenate(norm_img_batch)
			
 
				+            norm_img_batch = norm_img_batch.copy()
			
 
				+
			
 
				+            input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
			
 
				+            outputs = self.cls_onnx_session.run(
			
 
				+                self.cls_output_name, input_feed=input_feed
			
 
				+            )
			
 
				+
			
 
				+            prob_out = outputs[0]
			
 
				+
			
 
				+            cls_result = self.postprocess_op(prob_out)
			
 
				+            for rno in range(len(cls_result)):
			
 
				+                label, score = cls_result[rno]
			
 
				+                cls_res[indices[beg_img_no + rno]] = [label, score]
			
 
				+                if "180" in label and score > self.cls_thresh:
			
 
				+                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
			
 
				+                        img_list[indices[beg_img_no + rno]], 1
			
 
				+                    )
			
 
				+        return img_list, cls_res
			
--- a/python/onnxocr/onnxocr/predict_det.py
+++ b/python/onnxocr/onnxocr/predict_det.py
@@ -0,0 +1,120 @@
 
				+import numpy as np
			
 
				+from .imaug import transform, create_operators
			
 
				+from .db_postprocess import DBPostProcess
			
 
				+from .predict_base import PredictBase
			
 
				+
			
 
				+
			
 
				+class TextDetector(PredictBase):
			
 
				+    def __init__(self, args):
			
 
				+        self.args = args
			
 
				+        self.det_algorithm = args.det_algorithm
			
 
				+        pre_process_list = [
			
 
				+            {
			
 
				+                "DetResizeForTest": {
			
 
				+                    "limit_side_len": args.det_limit_side_len,
			
 
				+                    "limit_type": args.det_limit_type,
			
 
				+                }
			
 
				+            },
			
 
				+            {
			
 
				+                "NormalizeImage": {
			
 
				+                    "std": [0.229, 0.224, 0.225],
			
 
				+                    "mean": [0.485, 0.456, 0.406],
			
 
				+                    "scale": "1./255.",
			
 
				+                    "order": "hwc",
			
 
				+                }
			
 
				+            },
			
 
				+            {"ToCHWImage": None},
			
 
				+            {"KeepKeys": {"keep_keys": ["image", "shape"]}},
			
 
				+        ]
			
 
				+        postprocess_params = {}
			
 
				+        postprocess_params["name"] = "DBPostProcess"
			
 
				+        postprocess_params["thresh"] = args.det_db_thresh
			
 
				+        postprocess_params["box_thresh"] = args.det_db_box_thresh
			
 
				+        postprocess_params["max_candidates"] = 1000
			
 
				+        postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
			
 
				+        postprocess_params["use_dilation"] = args.use_dilation
			
 
				+        postprocess_params["score_mode"] = args.det_db_score_mode
			
 
				+        postprocess_params["box_type"] = args.det_box_type
			
 
				+
			
 
				+        # 实例化预处理操作类
			
 
				+        self.preprocess_op = create_operators(pre_process_list)
			
 
				+        # self.postprocess_op = build_post_process(postprocess_params)
			
 
				+        # 实例化后处理操作类
			
 
				+        self.postprocess_op = DBPostProcess(**postprocess_params)
			
 
				+
			
 
				+        # 初始化模型
			
 
				+        self.det_onnx_session = self.get_onnx_session(args.det_model_dir, args.use_gpu, gpu_id = args.gpu_id)
			
 
				+        self.det_input_name = self.get_input_name(self.det_onnx_session)
			
 
				+        self.det_output_name = self.get_output_name(self.det_onnx_session)
			
 
				+
			
 
				+    def order_points_clockwise(self, pts):
			
 
				+        rect = np.zeros((4, 2), dtype="float32")
			
 
				+        s = pts.sum(axis=1)
			
 
				+        rect[0] = pts[np.argmin(s)]
			
 
				+        rect[2] = pts[np.argmax(s)]
			
 
				+        tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0)
			
 
				+        diff = np.diff(np.array(tmp), axis=1)
			
 
				+        rect[1] = tmp[np.argmin(diff)]
			
 
				+        rect[3] = tmp[np.argmax(diff)]
			
 
				+        return rect
			
 
				+
			
 
				+    def clip_det_res(self, points, img_height, img_width):
			
 
				+        for pno in range(points.shape[0]):
			
 
				+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
			
 
				+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
			
 
				+        return points
			
 
				+
			
 
				+    def filter_tag_det_res(self, dt_boxes, image_shape):
			
 
				+        img_height, img_width = image_shape[0:2]
			
 
				+        dt_boxes_new = []
			
 
				+        for box in dt_boxes:
			
 
				+            if type(box) is list:
			
 
				+                box = np.array(box)
			
 
				+            box = self.order_points_clockwise(box)
			
 
				+            box = self.clip_det_res(box, img_height, img_width)
			
 
				+            rect_width = int(np.linalg.norm(box[0] - box[1]))
			
 
				+            rect_height = int(np.linalg.norm(box[0] - box[3]))
			
 
				+            if rect_width <= 3 or rect_height <= 3:
			
 
				+                continue
			
 
				+            dt_boxes_new.append(box)
			
 
				+        dt_boxes = np.array(dt_boxes_new)
			
 
				+        return dt_boxes
			
 
				+
			
 
				+    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
			
 
				+        img_height, img_width = image_shape[0:2]
			
 
				+        dt_boxes_new = []
			
 
				+        for box in dt_boxes:
			
 
				+            if type(box) is list:
			
 
				+                box = np.array(box)
			
 
				+            box = self.clip_det_res(box, img_height, img_width)
			
 
				+            dt_boxes_new.append(box)
			
 
				+        dt_boxes = np.array(dt_boxes_new)
			
 
				+        return dt_boxes
			
 
				+
			
 
				+    def __call__(self, img):
			
 
				+        ori_im = img.copy()
			
 
				+        data = {"image": img}
			
 
				+
			
 
				+        data = transform(data, self.preprocess_op)
			
 
				+        img, shape_list = data
			
 
				+        if img is None:
			
 
				+            return None, 0
			
 
				+        img = np.expand_dims(img, axis=0)
			
 
				+        shape_list = np.expand_dims(shape_list, axis=0)
			
 
				+        img = img.copy()
			
 
				+
			
 
				+        input_feed = self.get_input_feed(self.det_input_name, img)
			
 
				+        outputs = self.det_onnx_session.run(self.det_output_name, input_feed=input_feed)
			
 
				+
			
 
				+        preds = {}
			
 
				+        preds["maps"] = outputs[0]
			
 
				+
			
 
				+        post_result = self.postprocess_op(preds, shape_list)
			
 
				+        dt_boxes = post_result[0]["points"]
			
 
				+
			
 
				+        if self.args.det_box_type == "poly":
			
 
				+            dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
			
 
				+        else:
			
 
				+            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
			
 
				+
			
 
				+        return dt_boxes
			
--- a/python/onnxocr/onnxocr/predict_rec.py
+++ b/python/onnxocr/onnxocr/predict_rec.py
@@ -0,0 +1,326 @@
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import math
			
 
				+from PIL import Image
			
 
				+
			
 
				+
			
 
				+from .rec_postprocess import CTCLabelDecode
			
 
				+from .predict_base import PredictBase
			
 
				+
			
 
				+
			
 
				+class TextRecognizer(PredictBase):
			
 
				+    def __init__(self, args):
			
 
				+        self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
			
 
				+        self.rec_batch_num = args.rec_batch_num
			
 
				+        self.rec_algorithm = args.rec_algorithm
			
 
				+        self.postprocess_op = CTCLabelDecode(
			
 
				+            character_dict_path=args.rec_char_dict_path,
			
 
				+            use_space_char=args.use_space_char,
			
 
				+        )
			
 
				+
			
 
				+        # 初始化模型
			
 
				+        self.rec_onnx_session = self.get_onnx_session(args.rec_model_dir, args.use_gpu, gpu_id = args.gpu_id)
			
 
				+        self.rec_input_name = self.get_input_name(self.rec_onnx_session)
			
 
				+        self.rec_output_name = self.get_output_name(self.rec_onnx_session)
			
 
				+
			
 
				+    def resize_norm_img(self, img, max_wh_ratio):
			
 
				+        imgC, imgH, imgW = self.rec_image_shape
			
 
				+        if self.rec_algorithm == "NRTR" or self.rec_algorithm == "ViTSTR":
			
 
				+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+            # return padding_im
			
 
				+            image_pil = Image.fromarray(np.uint8(img))
			
 
				+            if self.rec_algorithm == "ViTSTR":
			
 
				+                img = image_pil.resize([imgW, imgH], Image.BICUBIC)
			
 
				+            else:
			
 
				+                img = image_pil.resize([imgW, imgH], Image.ANTIALIAS)
			
 
				+            img = np.array(img)
			
 
				+            norm_img = np.expand_dims(img, -1)
			
 
				+            norm_img = norm_img.transpose((2, 0, 1))
			
 
				+            if self.rec_algorithm == "ViTSTR":
			
 
				+                norm_img = norm_img.astype(np.float32) / 255.0
			
 
				+            else:
			
 
				+                norm_img = norm_img.astype(np.float32) / 128.0 - 1.0
			
 
				+            return norm_img
			
 
				+        elif self.rec_algorithm == "RFL":
			
 
				+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+            resized_image = cv2.resize(img, (imgW, imgH), interpolation=cv2.INTER_CUBIC)
			
 
				+            resized_image = resized_image.astype("float32")
			
 
				+            resized_image = resized_image / 255
			
 
				+            resized_image = resized_image[np.newaxis, :]
			
 
				+            resized_image -= 0.5
			
 
				+            resized_image /= 0.5
			
 
				+            return resized_image
			
 
				+
			
 
				+        assert imgC == img.shape[2]
			
 
				+        imgW = int((imgH * max_wh_ratio))
			
 
				+
			
 
				+        # w = self.rec_onnx_session.get_inputs()[0].shape[3:][0]
			
 
				+        # w = self.rec_onnx_session.get_inputs()[0].shape[3:][0]
			
 
				+        # print(w)
			
 
				+        # if w is not None and w > 0:
			
 
				+        #     imgW = w
			
 
				+
			
 
				+        h, w = img.shape[:2]
			
 
				+        ratio = w / float(h)
			
 
				+        if math.ceil(imgH * ratio) > imgW:
			
 
				+            resized_w = imgW
			
 
				+        else:
			
 
				+            resized_w = int(math.ceil(imgH * ratio))
			
 
				+        if self.rec_algorithm == "RARE":
			
 
				+            if resized_w > self.rec_image_shape[2]:
			
 
				+                resized_w = self.rec_image_shape[2]
			
 
				+            imgW = self.rec_image_shape[2]
			
 
				+        resized_image = cv2.resize(img, (resized_w, imgH))
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+        resized_image = resized_image.transpose((2, 0, 1)) / 255
			
 
				+        resized_image -= 0.5
			
 
				+        resized_image /= 0.5
			
 
				+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
			
 
				+        padding_im[:, :, 0:resized_w] = resized_image
			
 
				+        return padding_im
			
 
				+
			
 
				+    def resize_norm_img_vl(self, img, image_shape):
			
 
				+
			
 
				+        imgC, imgH, imgW = image_shape
			
 
				+        img = img[:, :, ::-1]  # bgr2rgb
			
 
				+        resized_image = cv2.resize(img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+        resized_image = resized_image.transpose((2, 0, 1)) / 255
			
 
				+        return resized_image
			
 
				+
			
 
				+    def resize_norm_img_srn(self, img, image_shape):
			
 
				+        imgC, imgH, imgW = image_shape
			
 
				+
			
 
				+        img_black = np.zeros((imgH, imgW))
			
 
				+        im_hei = img.shape[0]
			
 
				+        im_wid = img.shape[1]
			
 
				+
			
 
				+        if im_wid <= im_hei * 1:
			
 
				+            img_new = cv2.resize(img, (imgH * 1, imgH))
			
 
				+        elif im_wid <= im_hei * 2:
			
 
				+            img_new = cv2.resize(img, (imgH * 2, imgH))
			
 
				+        elif im_wid <= im_hei * 3:
			
 
				+            img_new = cv2.resize(img, (imgH * 3, imgH))
			
 
				+        else:
			
 
				+            img_new = cv2.resize(img, (imgW, imgH))
			
 
				+
			
 
				+        img_np = np.asarray(img_new)
			
 
				+        img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
			
 
				+        img_black[:, 0 : img_np.shape[1]] = img_np
			
 
				+        img_black = img_black[:, :, np.newaxis]
			
 
				+
			
 
				+        row, col, c = img_black.shape
			
 
				+        c = 1
			
 
				+
			
 
				+        return np.reshape(img_black, (c, row, col)).astype(np.float32)
			
 
				+
			
 
				+    def srn_other_inputs(self, image_shape, num_heads, max_text_length):
			
 
				+
			
 
				+        imgC, imgH, imgW = image_shape
			
 
				+        feature_dim = int((imgH / 8) * (imgW / 8))
			
 
				+
			
 
				+        encoder_word_pos = (
			
 
				+            np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype("int64")
			
 
				+        )
			
 
				+        gsrm_word_pos = (
			
 
				+            np.array(range(0, max_text_length))
			
 
				+            .reshape((max_text_length, 1))
			
 
				+            .astype("int64")
			
 
				+        )
			
 
				+
			
 
				+        gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length))
			
 
				+        gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape(
			
 
				+            [-1, 1, max_text_length, max_text_length]
			
 
				+        )
			
 
				+        gsrm_slf_attn_bias1 = np.tile(gsrm_slf_attn_bias1, [1, num_heads, 1, 1]).astype(
			
 
				+            "float32"
			
 
				+        ) * [-1e9]
			
 
				+
			
 
				+        gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape(
			
 
				+            [-1, 1, max_text_length, max_text_length]
			
 
				+        )
			
 
				+        gsrm_slf_attn_bias2 = np.tile(gsrm_slf_attn_bias2, [1, num_heads, 1, 1]).astype(
			
 
				+            "float32"
			
 
				+        ) * [-1e9]
			
 
				+
			
 
				+        encoder_word_pos = encoder_word_pos[np.newaxis, :]
			
 
				+        gsrm_word_pos = gsrm_word_pos[np.newaxis, :]
			
 
				+
			
 
				+        return [
			
 
				+            encoder_word_pos,
			
 
				+            gsrm_word_pos,
			
 
				+            gsrm_slf_attn_bias1,
			
 
				+            gsrm_slf_attn_bias2,
			
 
				+        ]
			
 
				+
			
 
				+    def process_image_srn(self, img, image_shape, num_heads, max_text_length):
			
 
				+        norm_img = self.resize_norm_img_srn(img, image_shape)
			
 
				+        norm_img = norm_img[np.newaxis, :]
			
 
				+
			
 
				+        [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = (
			
 
				+            self.srn_other_inputs(image_shape, num_heads, max_text_length)
			
 
				+        )
			
 
				+
			
 
				+        gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32)
			
 
				+        gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32)
			
 
				+        encoder_word_pos = encoder_word_pos.astype(np.int64)
			
 
				+        gsrm_word_pos = gsrm_word_pos.astype(np.int64)
			
 
				+
			
 
				+        return (
			
 
				+            norm_img,
			
 
				+            encoder_word_pos,
			
 
				+            gsrm_word_pos,
			
 
				+            gsrm_slf_attn_bias1,
			
 
				+            gsrm_slf_attn_bias2,
			
 
				+        )
			
 
				+
			
 
				+    def resize_norm_img_sar(self, img, image_shape, width_downsample_ratio=0.25):
			
 
				+        imgC, imgH, imgW_min, imgW_max = image_shape
			
 
				+        h = img.shape[0]
			
 
				+        w = img.shape[1]
			
 
				+        valid_ratio = 1.0
			
 
				+        # make sure new_width is an integral multiple of width_divisor.
			
 
				+        width_divisor = int(1 / width_downsample_ratio)
			
 
				+        # resize
			
 
				+        ratio = w / float(h)
			
 
				+        resize_w = math.ceil(imgH * ratio)
			
 
				+        if resize_w % width_divisor != 0:
			
 
				+            resize_w = round(resize_w / width_divisor) * width_divisor
			
 
				+        if imgW_min is not None:
			
 
				+            resize_w = max(imgW_min, resize_w)
			
 
				+        if imgW_max is not None:
			
 
				+            valid_ratio = min(1.0, 1.0 * resize_w / imgW_max)
			
 
				+            resize_w = min(imgW_max, resize_w)
			
 
				+        resized_image = cv2.resize(img, (resize_w, imgH))
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+        # norm
			
 
				+        if image_shape[0] == 1:
			
 
				+            resized_image = resized_image / 255
			
 
				+            resized_image = resized_image[np.newaxis, :]
			
 
				+        else:
			
 
				+            resized_image = resized_image.transpose((2, 0, 1)) / 255
			
 
				+        resized_image -= 0.5
			
 
				+        resized_image /= 0.5
			
 
				+        resize_shape = resized_image.shape
			
 
				+        padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32)
			
 
				+        padding_im[:, :, 0:resize_w] = resized_image
			
 
				+        pad_shape = padding_im.shape
			
 
				+
			
 
				+        return padding_im, resize_shape, pad_shape, valid_ratio
			
 
				+
			
 
				+    def resize_norm_img_spin(self, img):
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+        # return padding_im
			
 
				+        img = cv2.resize(img, tuple([100, 32]), cv2.INTER_CUBIC)
			
 
				+        img = np.array(img, np.float32)
			
 
				+        img = np.expand_dims(img, -1)
			
 
				+        img = img.transpose((2, 0, 1))
			
 
				+        mean = [127.5]
			
 
				+        std = [127.5]
			
 
				+        mean = np.array(mean, dtype=np.float32)
			
 
				+        std = np.array(std, dtype=np.float32)
			
 
				+        mean = np.float32(mean.reshape(1, -1))
			
 
				+        stdinv = 1 / np.float32(std.reshape(1, -1))
			
 
				+        img -= mean
			
 
				+        img *= stdinv
			
 
				+        return img
			
 
				+
			
 
				+    def resize_norm_img_svtr(self, img, image_shape):
			
 
				+
			
 
				+        imgC, imgH, imgW = image_shape
			
 
				+        resized_image = cv2.resize(img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+        resized_image = resized_image.transpose((2, 0, 1)) / 255
			
 
				+        resized_image -= 0.5
			
 
				+        resized_image /= 0.5
			
 
				+        return resized_image
			
 
				+
			
 
				+    def resize_norm_img_abinet(self, img, image_shape):
			
 
				+
			
 
				+        imgC, imgH, imgW = image_shape
			
 
				+
			
 
				+        resized_image = cv2.resize(img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+        resized_image = resized_image / 255.0
			
 
				+
			
 
				+        mean = np.array([0.485, 0.456, 0.406])
			
 
				+        std = np.array([0.229, 0.224, 0.225])
			
 
				+        resized_image = (resized_image - mean[None, None, ...]) / std[None, None, ...]
			
 
				+        resized_image = resized_image.transpose((2, 0, 1))
			
 
				+        resized_image = resized_image.astype("float32")
			
 
				+
			
 
				+        return resized_image
			
 
				+
			
 
				+    def norm_img_can(self, img, image_shape):
			
 
				+
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # CAN only predict gray scale image
			
 
				+
			
 
				+        if self.inverse:
			
 
				+            img = 255 - img
			
 
				+
			
 
				+        if self.rec_image_shape[0] == 1:
			
 
				+            h, w = img.shape
			
 
				+            _, imgH, imgW = self.rec_image_shape
			
 
				+            if h < imgH or w < imgW:
			
 
				+                padding_h = max(imgH - h, 0)
			
 
				+                padding_w = max(imgW - w, 0)
			
 
				+                img_padded = np.pad(
			
 
				+                    img,
			
 
				+                    ((0, padding_h), (0, padding_w)),
			
 
				+                    "constant",
			
 
				+                    constant_values=(255),
			
 
				+                )
			
 
				+                img = img_padded
			
 
				+
			
 
				+        img = np.expand_dims(img, 0) / 255.0  # h,w,c -> c,h,w
			
 
				+        img = img.astype("float32")
			
 
				+
			
 
				+        return img
			
 
				+
			
 
				+    def __call__(self, img_list):
			
 
				+        img_num = len(img_list)
			
 
				+        # Calculate the aspect ratio of all text bars
			
 
				+        width_list = []
			
 
				+        for img in img_list:
			
 
				+            width_list.append(img.shape[1] / float(img.shape[0]))
			
 
				+        # Sorting can speed up the recognition process
			
 
				+        indices = np.argsort(np.array(width_list))
			
 
				+        rec_res = [["", 0.0]] * img_num
			
 
				+        batch_num = self.rec_batch_num
			
 
				+
			
 
				+        for beg_img_no in range(0, img_num, batch_num):
			
 
				+            end_img_no = min(img_num, beg_img_no + batch_num)
			
 
				+            norm_img_batch = []
			
 
				+            imgC, imgH, imgW = self.rec_image_shape[:3]
			
 
				+            max_wh_ratio = imgW / imgH
			
 
				+            # max_wh_ratio = 0
			
 
				+            for ino in range(beg_img_no, end_img_no):
			
 
				+                h, w = img_list[indices[ino]].shape[0:2]
			
 
				+                wh_ratio = w * 1.0 / h
			
 
				+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
			
 
				+            for ino in range(beg_img_no, end_img_no):
			
 
				+                norm_img = self.resize_norm_img(img_list[indices[ino]], max_wh_ratio)
			
 
				+                norm_img = norm_img[np.newaxis, :]
			
 
				+                norm_img_batch.append(norm_img)
			
 
				+
			
 
				+            norm_img_batch = np.concatenate(norm_img_batch)
			
 
				+            norm_img_batch = norm_img_batch.copy()
			
 
				+
			
 
				+            # img = img[:, :, ::-1].transpose(2, 0, 1)
			
 
				+            # img = img[:, :, ::-1]
			
 
				+            # img = img.transpose(2, 0, 1)
			
 
				+            # img = img.astype(np.float32)
			
 
				+            # img = np.expand_dims(img, axis=0)
			
 
				+            # print(img.shape)
			
 
				+            input_feed = self.get_input_feed(self.rec_input_name, norm_img_batch)
			
 
				+            outputs = self.rec_onnx_session.run(
			
 
				+                self.rec_output_name, input_feed=input_feed
			
 
				+            )
			
 
				+
			
 
				+            preds = outputs[0]
			
 
				+
			
 
				+            rec_result = self.postprocess_op(preds)
			
 
				+            for rno in range(len(rec_result)):
			
 
				+                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
			
 
				+
			
 
				+        return rec_res
			
--- a/python/onnxocr/onnxocr/predict_system.py
+++ b/python/onnxocr/onnxocr/predict_system.py
@@ -0,0 +1,97 @@
 
				+import os
			
 
				+import cv2
			
 
				+import copy
			
 
				+from . import predict_det
			
 
				+from . import predict_cls
			
 
				+from . import predict_rec
			
 
				+from .utils import get_rotate_crop_image, get_minarea_rect_crop
			
 
				+
			
 
				+
			
 
				+class TextSystem(object):
			
 
				+    def __init__(self, args):
			
 
				+        self.text_detector = predict_det.TextDetector(args)
			
 
				+        self.text_recognizer = predict_rec.TextRecognizer(args)
			
 
				+        self.use_angle_cls = args.use_angle_cls
			
 
				+        self.drop_score = args.drop_score
			
 
				+        if self.use_angle_cls:
			
 
				+            self.text_classifier = predict_cls.TextClassifier(args)
			
 
				+
			
 
				+        self.args = args
			
 
				+        self.crop_image_res_index = 0
			
 
				+
			
 
				+    def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res):
			
 
				+        os.makedirs(output_dir, exist_ok=True)
			
 
				+        bbox_num = len(img_crop_list)
			
 
				+        for bno in range(bbox_num):
			
 
				+            cv2.imwrite(
			
 
				+                os.path.join(
			
 
				+                    output_dir, f"mg_crop_{bno+self.crop_image_res_index}.jpg"
			
 
				+                ),
			
 
				+                img_crop_list[bno],
			
 
				+            )
			
 
				+
			
 
				+        self.crop_image_res_index += bbox_num
			
 
				+
			
 
				+    def __call__(self, img, cls=True):
			
 
				+        ori_im = img.copy()
			
 
				+        # 文字检测
			
 
				+        dt_boxes = self.text_detector(img)
			
 
				+
			
 
				+        if dt_boxes is None:
			
 
				+            return None, None
			
 
				+
			
 
				+        img_crop_list = []
			
 
				+
			
 
				+        dt_boxes = sorted_boxes(dt_boxes)
			
 
				+
			
 
				+        # 图片裁剪
			
 
				+        for bno in range(len(dt_boxes)):
			
 
				+            tmp_box = copy.deepcopy(dt_boxes[bno])
			
 
				+            if self.args.det_box_type == "quad":
			
 
				+                img_crop = get_rotate_crop_image(ori_im, tmp_box)
			
 
				+            else:
			
 
				+                img_crop = get_minarea_rect_crop(ori_im, tmp_box)
			
 
				+            img_crop_list.append(img_crop)
			
 
				+
			
 
				+        # 方向分类
			
 
				+        if self.use_angle_cls and cls:
			
 
				+            img_crop_list, angle_list = self.text_classifier(img_crop_list)
			
 
				+
			
 
				+        # 图像识别
			
 
				+        rec_res = self.text_recognizer(img_crop_list)
			
 
				+
			
 
				+        if self.args.save_crop_res:
			
 
				+            self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, rec_res)
			
 
				+        filter_boxes, filter_rec_res = [], []
			
 
				+        for box, rec_result in zip(dt_boxes, rec_res):
			
 
				+            text, score = rec_result
			
 
				+            if score >= self.drop_score:
			
 
				+                filter_boxes.append(box)
			
 
				+                filter_rec_res.append(rec_result)
			
 
				+
			
 
				+        return filter_boxes, filter_rec_res
			
 
				+
			
 
				+
			
 
				+def sorted_boxes(dt_boxes):
			
 
				+    """
			
 
				+    Sort text boxes in order from top to bottom, left to right
			
 
				+    args:
			
 
				+        dt_boxes(array):detected text boxes with shape [4, 2]
			
 
				+    return:
			
 
				+        sorted boxes(array) with shape [4, 2]
			
 
				+    """
			
 
				+    num_boxes = dt_boxes.shape[0]
			
 
				+    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
			
 
				+    _boxes = list(sorted_boxes)
			
 
				+
			
 
				+    for i in range(num_boxes - 1):
			
 
				+        for j in range(i, -1, -1):
			
 
				+            if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and (
			
 
				+                _boxes[j + 1][0][0] < _boxes[j][0][0]
			
 
				+            ):
			
 
				+                tmp = _boxes[j]
			
 
				+                _boxes[j] = _boxes[j + 1]
			
 
				+                _boxes[j + 1] = tmp
			
 
				+            else:
			
 
				+                break
			
 
				+    return _boxes
			
--- a/python/onnxocr/onnxocr/readme.md
+++ b/python/onnxocr/onnxocr/readme.md
@@ -0,0 +1,65 @@
 
				+# paddleocr模型转换成onnx模型后，利用ONNX模型进行推理
			
 
				+## 1、安装paddle2onnx
			
 
				+```angular2html
			
 
				+pip install paddle2onnx
			
 
				+```
			
 
				+
			
 
				+## 2、下载paddleocr模型文件
			
 
				+```angular2html
			
 
				+!wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
			
 
				+!wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
			
 
				+!wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
			
 
				+```
			
 
				+## 3、解压模型文件
			
 
				+```angular2html
			
 
				+!tar -xvf /home/aistudio/onnx_pred/models/ch_ppocr_mobile_v2.0_cls_infer.tar
			
 
				+!tar -xvf /home/aistudio/onnx_pred/models/ch_ppocr_server_v2.0_det_infer.tar
			
 
				+!tar -xvf /home/aistudio/onnx_pred/models/ch_ppocr_server_v2.0_rec_infer.tar
			
 
				+```
			
 
				+
			
 
				+## 4、将paddleocr模型转成onxx模型
			
 
				+```angular2html
			
 
				+paddle2onnx --model_dir ./ch_ppocr_server_v2.0_rec_infer \
			
 
				+--model_filename inference.pdmodel \
			
 
				+--params_filename inference.pdiparams \
			
 
				+--save_file ./ch_ppocr_server_v2.0_rec.onnx \
			
 
				+--opset_version 11 \
			
 
				+--enable_onnx_checker True
			
 
				+
			
 
				+
			
 
				+paddle2onnx --model_dir ./ch_ppocr_server_v2.0_det_infer \
			
 
				+--model_filename inference.pdmodel \
			
 
				+--params_filename inference.pdiparams \
			
 
				+--save_file ./ch_ppocr_server_v2.0_det.onnx \
			
 
				+--opset_version 11 \
			
 
				+--enable_onnx_checker True
			
 
				+
			
 
				+
			
 
				+paddle2onnx --model_dir ./ch_ppocr_mobile_v2.0_cls_infer \
			
 
				+--model_filename inference.pdmodel \
			
 
				+--params_filename inference.pdiparams \
			
 
				+--save_file ./ch_ppocr_mobile_v2.0_cls.onnx \
			
 
				+--opset_version 11 \
			
 
				+--enable_onnx_checker True
			
 
				+```
			
 
				+
			
 
				+## 5、安装onnx
			
 
				+```angular2html
			
 
				+pip install onnx==1.14.0
			
 
				+pip install onnxruntime-gpu==1.14.1
			
 
				+```
			
 
				+
			
 
				+## 6、模型推理
			
 
				+```angular2html
			
 
				+    import cv2
			
 
				+    model = ONNXPaddleOcr()
			
 
				+
			
 
				+    img = cv2.imread('./1.jpg')
			
 
				+
			
 
				+    # ocr识别结果
			
 
				+    result = model.ocr(img)
			
 
				+    print(result)
			
 
				+    
			
 
				+    # 画box框
			
 
				+    sav2Img(img, result)
			
 
				+```
			
--- a/python/onnxocr/onnxocr/rec_postprocess.py
+++ b/python/onnxocr/onnxocr/rec_postprocess.py
@@ -0,0 +1,898 @@
 
				+import numpy as np
			
 
				+
			
 
				+# import paddle
			
 
				+paddle = None
			
 
				+# from paddle.nn import functional as F
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+class BaseRecLabelDecode(object):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False):
			
 
				+        self.beg_str = "sos"
			
 
				+        self.end_str = "eos"
			
 
				+        self.reverse = False
			
 
				+        self.character_str = []
			
 
				+
			
 
				+        if character_dict_path is None:
			
 
				+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
			
 
				+            dict_character = list(self.character_str)
			
 
				+        else:
			
 
				+            with open(character_dict_path, "rb") as fin:
			
 
				+                lines = fin.readlines()
			
 
				+                for line in lines:
			
 
				+                    line = line.decode("utf-8").strip("\n").strip("\r\n")
			
 
				+                    self.character_str.append(line)
			
 
				+            if use_space_char:
			
 
				+                self.character_str.append(" ")
			
 
				+            dict_character = list(self.character_str)
			
 
				+            if "arabic" in character_dict_path:
			
 
				+                self.reverse = True
			
 
				+
			
 
				+        dict_character = self.add_special_char(dict_character)
			
 
				+        self.dict = {}
			
 
				+        for i, char in enumerate(dict_character):
			
 
				+            self.dict[char] = i
			
 
				+        self.character = dict_character
			
 
				+
			
 
				+    def pred_reverse(self, pred):
			
 
				+        pred_re = []
			
 
				+        c_current = ""
			
 
				+        for c in pred:
			
 
				+            if not bool(re.search("[a-zA-Z0-9 :*./%+-]", c)):
			
 
				+                if c_current != "":
			
 
				+                    pred_re.append(c_current)
			
 
				+                pred_re.append(c)
			
 
				+                c_current = ""
			
 
				+            else:
			
 
				+                c_current += c
			
 
				+        if c_current != "":
			
 
				+            pred_re.append(c_current)
			
 
				+
			
 
				+        return "".join(pred_re[::-1])
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        return dict_character
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        ignored_tokens = self.get_ignored_tokens()
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            selection = np.ones(len(text_index[batch_idx]), dtype=bool)
			
 
				+            if is_remove_duplicate:
			
 
				+                selection[1:] = text_index[batch_idx][1:] != text_index[batch_idx][:-1]
			
 
				+            for ignored_token in ignored_tokens:
			
 
				+                selection &= text_index[batch_idx] != ignored_token
			
 
				+
			
 
				+            char_list = [
			
 
				+                self.character[text_id] for text_id in text_index[batch_idx][selection]
			
 
				+            ]
			
 
				+            if text_prob is not None:
			
 
				+                conf_list = text_prob[batch_idx][selection]
			
 
				+            else:
			
 
				+                conf_list = [1] * len(selection)
			
 
				+            if len(conf_list) == 0:
			
 
				+                conf_list = [0]
			
 
				+
			
 
				+            text = "".join(char_list)
			
 
				+
			
 
				+            if self.reverse:  # for arabic rec
			
 
				+                text = self.pred_reverse(text)
			
 
				+
			
 
				+            result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+    def get_ignored_tokens(self):
			
 
				+        return [0]  # for ctc blank
			
 
				+
			
 
				+
			
 
				+class CTCLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(CTCLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        if isinstance(preds, tuple) or isinstance(preds, list):
			
 
				+            preds = preds[-1]
			
 
				+        # if isinstance(preds, paddle.Tensor):
			
 
				+        #     preds = preds.numpy()
			
 
				+        preds_idx = preds.argmax(axis=2)
			
 
				+        preds_prob = preds.max(axis=2)
			
 
				+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label)
			
 
				+        return text, label
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        dict_character = ["blank"] + dict_character
			
 
				+        return dict_character
			
 
				+
			
 
				+
			
 
				+class DistillationCTCLabelDecode(CTCLabelDecode):
			
 
				+    """
			
 
				+    Convert
			
 
				+    Convert between text-label and text-index
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        character_dict_path=None,
			
 
				+        use_space_char=False,
			
 
				+        model_name=["student"],
			
 
				+        key=None,
			
 
				+        multi_head=False,
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        super(DistillationCTCLabelDecode, self).__init__(
			
 
				+            character_dict_path, use_space_char
			
 
				+        )
			
 
				+        if not isinstance(model_name, list):
			
 
				+            model_name = [model_name]
			
 
				+        self.model_name = model_name
			
 
				+
			
 
				+        self.key = key
			
 
				+        self.multi_head = multi_head
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        output = dict()
			
 
				+        for name in self.model_name:
			
 
				+            pred = preds[name]
			
 
				+            if self.key is not None:
			
 
				+                pred = pred[self.key]
			
 
				+            if self.multi_head and isinstance(pred, dict):
			
 
				+                pred = pred["ctc"]
			
 
				+            output[name] = super().__call__(pred, label=label, *args, **kwargs)
			
 
				+        return output
			
 
				+
			
 
				+
			
 
				+class AttnLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(AttnLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        self.beg_str = "sos"
			
 
				+        self.end_str = "eos"
			
 
				+        dict_character = dict_character
			
 
				+        dict_character = [self.beg_str] + dict_character + [self.end_str]
			
 
				+        return dict_character
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        ignored_tokens = self.get_ignored_tokens()
			
 
				+        [beg_idx, end_idx] = self.get_ignored_tokens()
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                if text_index[batch_idx][idx] in ignored_tokens:
			
 
				+                    continue
			
 
				+                if int(text_index[batch_idx][idx]) == int(end_idx):
			
 
				+                    break
			
 
				+                if is_remove_duplicate:
			
 
				+                    # only for predict
			
 
				+                    if (
			
 
				+                        idx > 0
			
 
				+                        and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]
			
 
				+                    ):
			
 
				+                        continue
			
 
				+                char_list.append(self.character[int(text_index[batch_idx][idx])])
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+            text = "".join(char_list)
			
 
				+            result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        """
			
 
				+        text = self.decode(text)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        else:
			
 
				+            label = self.decode(label, is_remove_duplicate=False)
			
 
				+            return text, label
			
 
				+        """
			
 
				+        if isinstance(preds, paddle.Tensor):
			
 
				+            preds = preds.numpy()
			
 
				+
			
 
				+        preds_idx = preds.argmax(axis=2)
			
 
				+        preds_prob = preds.max(axis=2)
			
 
				+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label, is_remove_duplicate=False)
			
 
				+        return text, label
			
 
				+
			
 
				+    def get_ignored_tokens(self):
			
 
				+        beg_idx = self.get_beg_end_flag_idx("beg")
			
 
				+        end_idx = self.get_beg_end_flag_idx("end")
			
 
				+        return [beg_idx, end_idx]
			
 
				+
			
 
				+    def get_beg_end_flag_idx(self, beg_or_end):
			
 
				+        if beg_or_end == "beg":
			
 
				+            idx = np.array(self.dict[self.beg_str])
			
 
				+        elif beg_or_end == "end":
			
 
				+            idx = np.array(self.dict[self.end_str])
			
 
				+        else:
			
 
				+            assert False, "unsupport type %s in get_beg_end_flag_idx" % beg_or_end
			
 
				+        return idx
			
 
				+
			
 
				+
			
 
				+class RFLLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(RFLLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        self.beg_str = "sos"
			
 
				+        self.end_str = "eos"
			
 
				+        dict_character = dict_character
			
 
				+        dict_character = [self.beg_str] + dict_character + [self.end_str]
			
 
				+        return dict_character
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        ignored_tokens = self.get_ignored_tokens()
			
 
				+        [beg_idx, end_idx] = self.get_ignored_tokens()
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                if text_index[batch_idx][idx] in ignored_tokens:
			
 
				+                    continue
			
 
				+                if int(text_index[batch_idx][idx]) == int(end_idx):
			
 
				+                    break
			
 
				+                if is_remove_duplicate:
			
 
				+                    # only for predict
			
 
				+                    if (
			
 
				+                        idx > 0
			
 
				+                        and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]
			
 
				+                    ):
			
 
				+                        continue
			
 
				+                char_list.append(self.character[int(text_index[batch_idx][idx])])
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+            text = "".join(char_list)
			
 
				+            result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        # if seq_outputs is not None:
			
 
				+        if isinstance(preds, tuple) or isinstance(preds, list):
			
 
				+            cnt_outputs, seq_outputs = preds
			
 
				+            if isinstance(seq_outputs, paddle.Tensor):
			
 
				+                seq_outputs = seq_outputs.numpy()
			
 
				+            preds_idx = seq_outputs.argmax(axis=2)
			
 
				+            preds_prob = seq_outputs.max(axis=2)
			
 
				+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+
			
 
				+            if label is None:
			
 
				+                return text
			
 
				+            label = self.decode(label, is_remove_duplicate=False)
			
 
				+            return text, label
			
 
				+
			
 
				+        else:
			
 
				+            cnt_outputs = preds
			
 
				+            if isinstance(cnt_outputs, paddle.Tensor):
			
 
				+                cnt_outputs = cnt_outputs.numpy()
			
 
				+            cnt_length = []
			
 
				+            for lens in cnt_outputs:
			
 
				+                length = round(np.sum(lens))
			
 
				+                cnt_length.append(length)
			
 
				+            if label is None:
			
 
				+                return cnt_length
			
 
				+            label = self.decode(label, is_remove_duplicate=False)
			
 
				+            length = [len(res[0]) for res in label]
			
 
				+            return cnt_length, length
			
 
				+
			
 
				+    def get_ignored_tokens(self):
			
 
				+        beg_idx = self.get_beg_end_flag_idx("beg")
			
 
				+        end_idx = self.get_beg_end_flag_idx("end")
			
 
				+        return [beg_idx, end_idx]
			
 
				+
			
 
				+    def get_beg_end_flag_idx(self, beg_or_end):
			
 
				+        if beg_or_end == "beg":
			
 
				+            idx = np.array(self.dict[self.beg_str])
			
 
				+        elif beg_or_end == "end":
			
 
				+            idx = np.array(self.dict[self.end_str])
			
 
				+        else:
			
 
				+            assert False, "unsupport type %s in get_beg_end_flag_idx" % beg_or_end
			
 
				+        return idx
			
 
				+
			
 
				+
			
 
				+class SEEDLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(SEEDLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        self.padding_str = "padding"
			
 
				+        self.end_str = "eos"
			
 
				+        self.unknown = "unknown"
			
 
				+        dict_character = dict_character + [self.end_str, self.padding_str, self.unknown]
			
 
				+        return dict_character
			
 
				+
			
 
				+    def get_ignored_tokens(self):
			
 
				+        end_idx = self.get_beg_end_flag_idx("eos")
			
 
				+        return [end_idx]
			
 
				+
			
 
				+    def get_beg_end_flag_idx(self, beg_or_end):
			
 
				+        if beg_or_end == "sos":
			
 
				+            idx = np.array(self.dict[self.beg_str])
			
 
				+        elif beg_or_end == "eos":
			
 
				+            idx = np.array(self.dict[self.end_str])
			
 
				+        else:
			
 
				+            assert False, "unsupport type %s in get_beg_end_flag_idx" % beg_or_end
			
 
				+        return idx
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        [end_idx] = self.get_ignored_tokens()
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                if int(text_index[batch_idx][idx]) == int(end_idx):
			
 
				+                    break
			
 
				+                if is_remove_duplicate:
			
 
				+                    # only for predict
			
 
				+                    if (
			
 
				+                        idx > 0
			
 
				+                        and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]
			
 
				+                    ):
			
 
				+                        continue
			
 
				+                char_list.append(self.character[int(text_index[batch_idx][idx])])
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+            text = "".join(char_list)
			
 
				+            result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        """
			
 
				+        text = self.decode(text)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        else:
			
 
				+            label = self.decode(label, is_remove_duplicate=False)
			
 
				+            return text, label
			
 
				+        """
			
 
				+        preds_idx = preds["rec_pred"]
			
 
				+        if isinstance(preds_idx, paddle.Tensor):
			
 
				+            preds_idx = preds_idx.numpy()
			
 
				+        if "rec_pred_scores" in preds:
			
 
				+            preds_idx = preds["rec_pred"]
			
 
				+            preds_prob = preds["rec_pred_scores"]
			
 
				+        else:
			
 
				+            preds_idx = preds["rec_pred"].argmax(axis=2)
			
 
				+            preds_prob = preds["rec_pred"].max(axis=2)
			
 
				+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label, is_remove_duplicate=False)
			
 
				+        return text, label
			
 
				+
			
 
				+
			
 
				+class SRNLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(SRNLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+        self.max_text_length = kwargs.get("max_text_length", 25)
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        pred = preds["predict"]
			
 
				+        char_num = len(self.character_str) + 2
			
 
				+        if isinstance(pred, paddle.Tensor):
			
 
				+            pred = pred.numpy()
			
 
				+        pred = np.reshape(pred, [-1, char_num])
			
 
				+
			
 
				+        preds_idx = np.argmax(pred, axis=1)
			
 
				+        preds_prob = np.max(pred, axis=1)
			
 
				+
			
 
				+        preds_idx = np.reshape(preds_idx, [-1, self.max_text_length])
			
 
				+
			
 
				+        preds_prob = np.reshape(preds_prob, [-1, self.max_text_length])
			
 
				+
			
 
				+        text = self.decode(preds_idx, preds_prob)
			
 
				+
			
 
				+        if label is None:
			
 
				+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+            return text
			
 
				+        label = self.decode(label)
			
 
				+        return text, label
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        ignored_tokens = self.get_ignored_tokens()
			
 
				+        batch_size = len(text_index)
			
 
				+
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                if text_index[batch_idx][idx] in ignored_tokens:
			
 
				+                    continue
			
 
				+                if is_remove_duplicate:
			
 
				+                    # only for predict
			
 
				+                    if (
			
 
				+                        idx > 0
			
 
				+                        and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]
			
 
				+                    ):
			
 
				+                        continue
			
 
				+                char_list.append(self.character[int(text_index[batch_idx][idx])])
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+
			
 
				+            text = "".join(char_list)
			
 
				+            result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        dict_character = dict_character + [self.beg_str, self.end_str]
			
 
				+        return dict_character
			
 
				+
			
 
				+    def get_ignored_tokens(self):
			
 
				+        beg_idx = self.get_beg_end_flag_idx("beg")
			
 
				+        end_idx = self.get_beg_end_flag_idx("end")
			
 
				+        return [beg_idx, end_idx]
			
 
				+
			
 
				+    def get_beg_end_flag_idx(self, beg_or_end):
			
 
				+        if beg_or_end == "beg":
			
 
				+            idx = np.array(self.dict[self.beg_str])
			
 
				+        elif beg_or_end == "end":
			
 
				+            idx = np.array(self.dict[self.end_str])
			
 
				+        else:
			
 
				+            assert False, "unsupport type %s in get_beg_end_flag_idx" % beg_or_end
			
 
				+        return idx
			
 
				+
			
 
				+
			
 
				+class SARLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(SARLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+        self.rm_symbol = kwargs.get("rm_symbol", False)
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        beg_end_str = "<BOS/EOS>"
			
 
				+        unknown_str = "<UKN>"
			
 
				+        padding_str = "<PAD>"
			
 
				+        dict_character = dict_character + [unknown_str]
			
 
				+        self.unknown_idx = len(dict_character) - 1
			
 
				+        dict_character = dict_character + [beg_end_str]
			
 
				+        self.start_idx = len(dict_character) - 1
			
 
				+        self.end_idx = len(dict_character) - 1
			
 
				+        dict_character = dict_character + [padding_str]
			
 
				+        self.padding_idx = len(dict_character) - 1
			
 
				+        return dict_character
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        ignored_tokens = self.get_ignored_tokens()
			
 
				+
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                if text_index[batch_idx][idx] in ignored_tokens:
			
 
				+                    continue
			
 
				+                if int(text_index[batch_idx][idx]) == int(self.end_idx):
			
 
				+                    if text_prob is None and idx == 0:
			
 
				+                        continue
			
 
				+                    else:
			
 
				+                        break
			
 
				+                if is_remove_duplicate:
			
 
				+                    # only for predict
			
 
				+                    if (
			
 
				+                        idx > 0
			
 
				+                        and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]
			
 
				+                    ):
			
 
				+                        continue
			
 
				+                char_list.append(self.character[int(text_index[batch_idx][idx])])
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+            text = "".join(char_list)
			
 
				+            if self.rm_symbol:
			
 
				+                comp = re.compile("[^A-Z^a-z^0-9^\u4e00-\u9fa5]")
			
 
				+                text = text.lower()
			
 
				+                text = comp.sub("", text)
			
 
				+            result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        if isinstance(preds, paddle.Tensor):
			
 
				+            preds = preds.numpy()
			
 
				+        preds_idx = preds.argmax(axis=2)
			
 
				+        preds_prob = preds.max(axis=2)
			
 
				+
			
 
				+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label, is_remove_duplicate=False)
			
 
				+        return text, label
			
 
				+
			
 
				+    def get_ignored_tokens(self):
			
 
				+        return [self.padding_idx]
			
 
				+
			
 
				+
			
 
				+class DistillationSARLabelDecode(SARLabelDecode):
			
 
				+    """
			
 
				+    Convert
			
 
				+    Convert between text-label and text-index
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        character_dict_path=None,
			
 
				+        use_space_char=False,
			
 
				+        model_name=["student"],
			
 
				+        key=None,
			
 
				+        multi_head=False,
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        super(DistillationSARLabelDecode, self).__init__(
			
 
				+            character_dict_path, use_space_char
			
 
				+        )
			
 
				+        if not isinstance(model_name, list):
			
 
				+            model_name = [model_name]
			
 
				+        self.model_name = model_name
			
 
				+
			
 
				+        self.key = key
			
 
				+        self.multi_head = multi_head
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        output = dict()
			
 
				+        for name in self.model_name:
			
 
				+            pred = preds[name]
			
 
				+            if self.key is not None:
			
 
				+                pred = pred[self.key]
			
 
				+            if self.multi_head and isinstance(pred, dict):
			
 
				+                pred = pred["sar"]
			
 
				+            output[name] = super().__call__(pred, label=label, *args, **kwargs)
			
 
				+        return output
			
 
				+
			
 
				+
			
 
				+class PRENLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(PRENLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        padding_str = "<PAD>"  # 0
			
 
				+        end_str = "<EOS>"  # 1
			
 
				+        unknown_str = "<UNK>"  # 2
			
 
				+
			
 
				+        dict_character = [padding_str, end_str, unknown_str] + dict_character
			
 
				+        self.padding_idx = 0
			
 
				+        self.end_idx = 1
			
 
				+        self.unknown_idx = 2
			
 
				+
			
 
				+        return dict_character
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        batch_size = len(text_index)
			
 
				+
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                if text_index[batch_idx][idx] == self.end_idx:
			
 
				+                    break
			
 
				+                if text_index[batch_idx][idx] in [self.padding_idx, self.unknown_idx]:
			
 
				+                    continue
			
 
				+                char_list.append(self.character[int(text_index[batch_idx][idx])])
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+
			
 
				+            text = "".join(char_list)
			
 
				+            if len(text) > 0:
			
 
				+                result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+            else:
			
 
				+                # here confidence of empty recog result is 1
			
 
				+                result_list.append(("", 1))
			
 
				+        return result_list
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        if isinstance(preds, paddle.Tensor):
			
 
				+            preds = preds.numpy()
			
 
				+        preds_idx = preds.argmax(axis=2)
			
 
				+        preds_prob = preds.max(axis=2)
			
 
				+        text = self.decode(preds_idx, preds_prob)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label)
			
 
				+        return text, label
			
 
				+
			
 
				+
			
 
				+class NRTRLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=True, **kwargs):
			
 
				+        super(NRTRLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+
			
 
				+        if len(preds) == 2:
			
 
				+            preds_id = preds[0]
			
 
				+            preds_prob = preds[1]
			
 
				+            if isinstance(preds_id, paddle.Tensor):
			
 
				+                preds_id = preds_id.numpy()
			
 
				+            if isinstance(preds_prob, paddle.Tensor):
			
 
				+                preds_prob = preds_prob.numpy()
			
 
				+            if preds_id[0][0] == 2:
			
 
				+                preds_idx = preds_id[:, 1:]
			
 
				+                preds_prob = preds_prob[:, 1:]
			
 
				+            else:
			
 
				+                preds_idx = preds_id
			
 
				+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+            if label is None:
			
 
				+                return text
			
 
				+            label = self.decode(label[:, 1:])
			
 
				+        else:
			
 
				+            if isinstance(preds, paddle.Tensor):
			
 
				+                preds = preds.numpy()
			
 
				+            preds_idx = preds.argmax(axis=2)
			
 
				+            preds_prob = preds.max(axis=2)
			
 
				+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+            if label is None:
			
 
				+                return text
			
 
				+            label = self.decode(label[:, 1:])
			
 
				+        return text, label
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        dict_character = ["blank", "<unk>", "<s>", "</s>"] + dict_character
			
 
				+        return dict_character
			
 
				+
			
 
				+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+        """convert text-index into text-label."""
			
 
				+        result_list = []
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            char_list = []
			
 
				+            conf_list = []
			
 
				+            for idx in range(len(text_index[batch_idx])):
			
 
				+                try:
			
 
				+                    char_idx = self.character[int(text_index[batch_idx][idx])]
			
 
				+                except:
			
 
				+                    continue
			
 
				+                if char_idx == "</s>":  # end
			
 
				+                    break
			
 
				+                char_list.append(char_idx)
			
 
				+                if text_prob is not None:
			
 
				+                    conf_list.append(text_prob[batch_idx][idx])
			
 
				+                else:
			
 
				+                    conf_list.append(1)
			
 
				+            text = "".join(char_list)
			
 
				+            result_list.append((text.lower(), np.mean(conf_list).tolist()))
			
 
				+        return result_list
			
 
				+
			
 
				+
			
 
				+class ViTSTRLabelDecode(NRTRLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(ViTSTRLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        if isinstance(preds, paddle.Tensor):
			
 
				+            preds = preds[:, 1:].numpy()
			
 
				+        else:
			
 
				+            preds = preds[:, 1:]
			
 
				+        preds_idx = preds.argmax(axis=2)
			
 
				+        preds_prob = preds.max(axis=2)
			
 
				+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label[:, 1:])
			
 
				+        return text, label
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        dict_character = ["<s>", "</s>"] + dict_character
			
 
				+        return dict_character
			
 
				+
			
 
				+
			
 
				+class ABINetLabelDecode(NRTRLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(ABINetLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        if isinstance(preds, dict):
			
 
				+            preds = preds["align"][-1].numpy()
			
 
				+        elif isinstance(preds, paddle.Tensor):
			
 
				+            preds = preds.numpy()
			
 
				+        else:
			
 
				+            preds = preds
			
 
				+
			
 
				+        preds_idx = preds.argmax(axis=2)
			
 
				+        preds_prob = preds.max(axis=2)
			
 
				+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label)
			
 
				+        return text, label
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        dict_character = ["</s>"] + dict_character
			
 
				+        return dict_character
			
 
				+
			
 
				+
			
 
				+class SPINLabelDecode(AttnLabelDecode):
			
 
				+    """Convert between text-label and text-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(SPINLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def add_special_char(self, dict_character):
			
 
				+        self.beg_str = "sos"
			
 
				+        self.end_str = "eos"
			
 
				+        dict_character = dict_character
			
 
				+        dict_character = [self.beg_str] + [self.end_str] + dict_character
			
 
				+        return dict_character
			
 
				+
			
 
				+
			
 
				+# class VLLabelDecode(BaseRecLabelDecode):
			
 
				+#     """ Convert between text-label and text-index """
			
 
				+#
			
 
				+#     def __init__(self, character_dict_path=None, use_space_char=False,
			
 
				+#                  **kwargs):
			
 
				+#         super(VLLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+#         self.max_text_length = kwargs.get('max_text_length', 25)
			
 
				+#         self.nclass = len(self.character) + 1
			
 
				+#
			
 
				+#     def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
			
 
				+#         """ convert text-index into text-label. """
			
 
				+#         result_list = []
			
 
				+#         ignored_tokens = self.get_ignored_tokens()
			
 
				+#         batch_size = len(text_index)
			
 
				+#         for batch_idx in range(batch_size):
			
 
				+#             selection = np.ones(len(text_index[batch_idx]), dtype=bool)
			
 
				+#             if is_remove_duplicate:
			
 
				+#                 selection[1:] = text_index[batch_idx][1:] != text_index[
			
 
				+#                     batch_idx][:-1]
			
 
				+#             for ignored_token in ignored_tokens:
			
 
				+#                 selection &= text_index[batch_idx] != ignored_token
			
 
				+#
			
 
				+#             char_list = [
			
 
				+#                 self.character[text_id - 1]
			
 
				+#                 for text_id in text_index[batch_idx][selection]
			
 
				+#             ]
			
 
				+#             if text_prob is not None:
			
 
				+#                 conf_list = text_prob[batch_idx][selection]
			
 
				+#             else:
			
 
				+#                 conf_list = [1] * len(selection)
			
 
				+#             if len(conf_list) == 0:
			
 
				+#                 conf_list = [0]
			
 
				+#
			
 
				+#             text = ''.join(char_list)
			
 
				+#             result_list.append((text, np.mean(conf_list).tolist()))
			
 
				+#         return result_list
			
 
				+#
			
 
				+#     def __call__(self, preds, label=None, length=None, *args, **kwargs):
			
 
				+#         if len(preds) == 2:  # eval mode
			
 
				+#             text_pre, x = preds
			
 
				+#             b = text_pre.shape[1]
			
 
				+#             lenText = self.max_text_length
			
 
				+#             nsteps = self.max_text_length
			
 
				+#
			
 
				+#             if not isinstance(text_pre, paddle.Tensor):
			
 
				+#                 text_pre = paddle.to_tensor(text_pre, dtype='float32')
			
 
				+#
			
 
				+#             out_res = paddle.zeros(
			
 
				+#                 shape=[lenText, b, self.nclass], dtype=x.dtype)
			
 
				+#             out_length = paddle.zeros(shape=[b], dtype=x.dtype)
			
 
				+#             now_step = 0
			
 
				+#             for _ in range(nsteps):
			
 
				+#                 if 0 in out_length and now_step < nsteps:
			
 
				+#                     tmp_result = text_pre[now_step, :, :]
			
 
				+#                     out_res[now_step] = tmp_result
			
 
				+#                     tmp_result = tmp_result.topk(1)[1].squeeze(axis=1)
			
 
				+#                     for j in range(b):
			
 
				+#                         if out_length[j] == 0 and tmp_result[j] == 0:
			
 
				+#                             out_length[j] = now_step + 1
			
 
				+#                     now_step += 1
			
 
				+#             for j in range(0, b):
			
 
				+#                 if int(out_length[j]) == 0:
			
 
				+#                     out_length[j] = nsteps
			
 
				+#             start = 0
			
 
				+#             output = paddle.zeros(
			
 
				+#                 shape=[int(out_length.sum()), self.nclass], dtype=x.dtype)
			
 
				+#             for i in range(0, b):
			
 
				+#                 cur_length = int(out_length[i])
			
 
				+#                 output[start:start + cur_length] = out_res[0:cur_length, i, :]
			
 
				+#                 start += cur_length
			
 
				+#             net_out = output
			
 
				+#             length = out_length
			
 
				+#
			
 
				+#         else:  # train mode
			
 
				+#             net_out = preds[0]
			
 
				+#             length = length
			
 
				+#             net_out = paddle.concat([t[:l] for t, l in zip(net_out, length)])
			
 
				+#         text = []
			
 
				+#         if not isinstance(net_out, paddle.Tensor):
			
 
				+#             net_out = paddle.to_tensor(net_out, dtype='float32')
			
 
				+#         net_out = F.softmax(net_out, axis=1)
			
 
				+#         for i in range(0, length.shape[0]):
			
 
				+#             preds_idx = net_out[int(length[:i].sum()):int(length[:i].sum(
			
 
				+#             ) + length[i])].topk(1)[1][:, 0].tolist()
			
 
				+#             preds_text = ''.join([
			
 
				+#                 self.character[idx - 1]
			
 
				+#                 if idx > 0 and idx <= len(self.character) else ''
			
 
				+#                 for idx in preds_idx
			
 
				+#             ])
			
 
				+#             preds_prob = net_out[int(length[:i].sum()):int(length[:i].sum(
			
 
				+#             ) + length[i])].topk(1)[0][:, 0]
			
 
				+#             preds_prob = paddle.exp(
			
 
				+#                 paddle.log(preds_prob).sum() / (preds_prob.shape[0] + 1e-6))
			
 
				+#             text.append((preds_text, preds_prob.numpy()[0]))
			
 
				+#         if label is None:
			
 
				+#             return text
			
 
				+#         label = self.decode(label)
			
 
				+#         return text, label
			
 
				+
			
 
				+
			
 
				+class CANLabelDecode(BaseRecLabelDecode):
			
 
				+    """Convert between latex-symbol and symbol-index"""
			
 
				+
			
 
				+    def __init__(self, character_dict_path=None, use_space_char=False, **kwargs):
			
 
				+        super(CANLabelDecode, self).__init__(character_dict_path, use_space_char)
			
 
				+
			
 
				+    def decode(self, text_index, preds_prob=None):
			
 
				+        result_list = []
			
 
				+        batch_size = len(text_index)
			
 
				+        for batch_idx in range(batch_size):
			
 
				+            seq_end = text_index[batch_idx].argmin(0)
			
 
				+            idx_list = text_index[batch_idx][:seq_end].tolist()
			
 
				+            symbol_list = [self.character[idx] for idx in idx_list]
			
 
				+            probs = []
			
 
				+            if preds_prob is not None:
			
 
				+                probs = preds_prob[batch_idx][: len(symbol_list)].tolist()
			
 
				+
			
 
				+            result_list.append([" ".join(symbol_list), probs])
			
 
				+        return result_list
			
 
				+
			
 
				+    def __call__(self, preds, label=None, *args, **kwargs):
			
 
				+        pred_prob, _, _, _ = preds
			
 
				+        preds_idx = pred_prob.argmax(axis=2)
			
 
				+
			
 
				+        text = self.decode(preds_idx)
			
 
				+        if label is None:
			
 
				+            return text
			
 
				+        label = self.decode(label)
			
 
				+        return text, label
			
--- a/python/onnxocr/onnxocr/test_images/00006737.jpg
+++ b/python/onnxocr/onnxocr/test_images/00006737.jpg
--- a/python/onnxocr/onnxocr/test_images/00009282.jpg
+++ b/python/onnxocr/onnxocr/test_images/00009282.jpg
--- a/python/onnxocr/onnxocr/test_images/00015504.jpg
+++ b/python/onnxocr/onnxocr/test_images/00015504.jpg
--- a/python/onnxocr/onnxocr/test_images/00018069.jpg
+++ b/python/onnxocr/onnxocr/test_images/00018069.jpg
--- a/python/onnxocr/onnxocr/test_images/00056221.jpg
+++ b/python/onnxocr/onnxocr/test_images/00056221.jpg
--- a/python/onnxocr/onnxocr/test_images/00057937.jpg
+++ b/python/onnxocr/onnxocr/test_images/00057937.jpg
--- a/python/onnxocr/onnxocr/test_images/00059985.jpg
+++ b/python/onnxocr/onnxocr/test_images/00059985.jpg
--- a/python/onnxocr/onnxocr/test_images/00111002.jpg
+++ b/python/onnxocr/onnxocr/test_images/00111002.jpg
--- a/python/onnxocr/onnxocr/test_images/00207393.jpg
+++ b/python/onnxocr/onnxocr/test_images/00207393.jpg
--- a/python/onnxocr/onnxocr/test_images/1.jpg
+++ b/python/onnxocr/onnxocr/test_images/1.jpg
--- a/python/onnxocr/onnxocr/test_images/11.jpg
+++ b/python/onnxocr/onnxocr/test_images/11.jpg
--- a/python/onnxocr/onnxocr/test_images/12.jpg
+++ b/python/onnxocr/onnxocr/test_images/12.jpg
--- a/python/onnxocr/onnxocr/test_images/32e0869f54edcf90cc8e93b981f7235.jpg
+++ b/python/onnxocr/onnxocr/test_images/32e0869f54edcf90cc8e93b981f7235.jpg
--- a/python/onnxocr/onnxocr/test_images/715873facf064583b44ef28295126fa7.jpg
+++ b/python/onnxocr/onnxocr/test_images/715873facf064583b44ef28295126fa7.jpg
--- a/python/onnxocr/onnxocr/test_images/QQ群.jpg
+++ b/python/onnxocr/onnxocr/test_images/QQ群.jpg
--- a/python/onnxocr/onnxocr/test_images/french_0.jpg
+++ b/python/onnxocr/onnxocr/test_images/french_0.jpg
--- a/python/onnxocr/onnxocr/test_images/ger_1.jpg
+++ b/python/onnxocr/onnxocr/test_images/ger_1.jpg
--- a/python/onnxocr/onnxocr/test_images/ger_2.jpg
+++ b/python/onnxocr/onnxocr/test_images/ger_2.jpg
--- a/python/onnxocr/onnxocr/test_images/images/00006737.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/00006737.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1000.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1000.jpg
--- a/python/onnxocr/onnxocr/test_images/images/111.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/111.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1111.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1111.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1112.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1112.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1113.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1113.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1114.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1114.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1115.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1115.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1116.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1116.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1117.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1117.jpg
--- a/python/onnxocr/onnxocr/test_images/images/1118.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/1118.jpg
--- a/python/onnxocr/onnxocr/test_images/images/222.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/222.jpg
--- a/python/onnxocr/onnxocr/test_images/images/333.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/333.jpg
--- a/python/onnxocr/onnxocr/test_images/images/444.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/444.jpg
--- a/python/onnxocr/onnxocr/test_images/images/555.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/555.jpg
--- a/python/onnxocr/onnxocr/test_images/images/666.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/666.jpg
--- a/python/onnxocr/onnxocr/test_images/images/777.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/777.jpg
--- a/python/onnxocr/onnxocr/test_images/images/888.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/888.jpg
--- a/python/onnxocr/onnxocr/test_images/images/999.jpg
+++ b/python/onnxocr/onnxocr/test_images/images/999.jpg
--- a/python/onnxocr/onnxocr/test_images/images/d8fe9983bc7ff2735809b90bc082455.png
+++ b/python/onnxocr/onnxocr/test_images/images/d8fe9983bc7ff2735809b90bc082455.png
--- a/python/onnxocr/onnxocr/test_images/japan_1.jpg
+++ b/python/onnxocr/onnxocr/test_images/japan_1.jpg
--- a/python/onnxocr/onnxocr/test_images/japan_2.jpg
+++ b/python/onnxocr/onnxocr/test_images/japan_2.jpg
--- a/python/onnxocr/onnxocr/test_images/korean_1.jpg
+++ b/python/onnxocr/onnxocr/test_images/korean_1.jpg
--- a/python/onnxocr/onnxocr/test_images/model_prod_flow_ch.png
+++ b/python/onnxocr/onnxocr/test_images/model_prod_flow_ch.png
--- a/python/onnxocr/onnxocr/test_images/myQR.jpg
+++ b/python/onnxocr/onnxocr/test_images/myQR.jpg
--- a/python/onnxocr/onnxocr/test_images/onnxocr_logo.png
+++ b/python/onnxocr/onnxocr/test_images/onnxocr_logo.png
--- a/python/onnxocr/onnxocr/test_images/weixin_pay.jpg
+++ b/python/onnxocr/onnxocr/test_images/weixin_pay.jpg
--- a/python/onnxocr/onnxocr/test_images/zhifubao_pay.jpg
+++ b/python/onnxocr/onnxocr/test_images/zhifubao_pay.jpg
--- a/python/onnxocr/onnxocr/test_images/微信群.jpg
+++ b/python/onnxocr/onnxocr/test_images/微信群.jpg
--- a/python/onnxocr/onnxocr/utils.py
+++ b/python/onnxocr/onnxocr/utils.py
@@ -0,0 +1,387 @@
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import argparse
			
 
				+import math
			
 
				+from PIL import Image, ImageDraw, ImageFont
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# 获取当前文件所在的目录
			
 
				+module_dir = Path(__file__).resolve().parent
			
 
				+
			
 
				+
			
 
				+def get_rotate_crop_image(img, points):
			
 
				+    """
			
 
				+    img_height, img_width = img.shape[0:2]
			
 
				+    left = int(np.min(points[:, 0]))
			
 
				+    right = int(np.max(points[:, 0]))
			
 
				+    top = int(np.min(points[:, 1]))
			
 
				+    bottom = int(np.max(points[:, 1]))
			
 
				+    img_crop = img[top:bottom, left:right, :].copy()
			
 
				+    points[:, 0] = points[:, 0] - left
			
 
				+    points[:, 1] = points[:, 1] - top
			
 
				+    """
			
 
				+    assert len(points) == 4, "shape of points must be 4*2"
			
 
				+    img_crop_width = int(
			
 
				+        max(
			
 
				+            np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3])
			
 
				+        )
			
 
				+    )
			
 
				+    img_crop_height = int(
			
 
				+        max(
			
 
				+            np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2])
			
 
				+        )
			
 
				+    )
			
 
				+    pts_std = np.float32(
			
 
				+        [
			
 
				+            [0, 0],
			
 
				+            [img_crop_width, 0],
			
 
				+            [img_crop_width, img_crop_height],
			
 
				+            [0, img_crop_height],
			
 
				+        ]
			
 
				+    )
			
 
				+    M = cv2.getPerspectiveTransform(points, pts_std)
			
 
				+    dst_img = cv2.warpPerspective(
			
 
				+        img,
			
 
				+        M,
			
 
				+        (img_crop_width, img_crop_height),
			
 
				+        borderMode=cv2.BORDER_REPLICATE,
			
 
				+        flags=cv2.INTER_CUBIC,
			
 
				+    )
			
 
				+    dst_img_height, dst_img_width = dst_img.shape[0:2]
			
 
				+    if dst_img_height * 1.0 / dst_img_width >= 1.5:
			
 
				+        dst_img = np.rot90(dst_img)
			
 
				+    return dst_img
			
 
				+
			
 
				+
			
 
				+def get_minarea_rect_crop(img, points):
			
 
				+    bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
			
 
				+    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				+
			
 
				+    index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				+    if points[1][1] > points[0][1]:
			
 
				+        index_a = 0
			
 
				+        index_d = 1
			
 
				+    else:
			
 
				+        index_a = 1
			
 
				+        index_d = 0
			
 
				+    if points[3][1] > points[2][1]:
			
 
				+        index_b = 2
			
 
				+        index_c = 3
			
 
				+    else:
			
 
				+        index_b = 3
			
 
				+        index_c = 2
			
 
				+
			
 
				+    box = [points[index_a], points[index_b], points[index_c], points[index_d]]
			
 
				+    crop_img = get_rotate_crop_image(img, np.array(box))
			
 
				+    return crop_img
			
 
				+
			
 
				+
			
 
				+def resize_img(img, input_size=600):
			
 
				+    """
			
 
				+    resize img and limit the longest side of the image to input_size
			
 
				+    """
			
 
				+    img = np.array(img)
			
 
				+    im_shape = img.shape
			
 
				+    im_size_max = np.max(im_shape[0:2])
			
 
				+    im_scale = float(input_size) / float(im_size_max)
			
 
				+    img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
			
 
				+    return img
			
 
				+
			
 
				+
			
 
				+def str_count(s):
			
 
				+    """
			
 
				+    Count the number of Chinese characters,
			
 
				+    a single English character and a single number
			
 
				+    equal to half the length of Chinese characters.
			
 
				+    args:
			
 
				+        s(string): the input of string
			
 
				+    return(int):
			
 
				+        the number of Chinese characters
			
 
				+    """
			
 
				+    import string
			
 
				+
			
 
				+    count_zh = count_pu = 0
			
 
				+    s_len = len(str(s))
			
 
				+    en_dg_count = 0
			
 
				+    for c in str(s):
			
 
				+        if c in string.ascii_letters or c.isdigit() or c.isspace():
			
 
				+            en_dg_count += 1
			
 
				+        elif c.isalpha():
			
 
				+            count_zh += 1
			
 
				+        else:
			
 
				+            count_pu += 1
			
 
				+    return s_len - math.ceil(en_dg_count / 2)
			
 
				+
			
 
				+
			
 
				+def text_visual(
			
 
				+    texts,
			
 
				+    scores,
			
 
				+    img_h=400,
			
 
				+    img_w=600,
			
 
				+    threshold=0.0,
			
 
				+    font_path=str(module_dir / "fonts/simfang.ttf"),
			
 
				+):
			
 
				+    """
			
 
				+    create new blank img and draw txt on it
			
 
				+    args:
			
 
				+        texts(list): the text will be draw
			
 
				+        scores(list|None): corresponding score of each txt
			
 
				+        img_h(int): the height of blank img
			
 
				+        img_w(int): the width of blank img
			
 
				+        font_path: the path of font which is used to draw text
			
 
				+    return(array):
			
 
				+    """
			
 
				+    if scores is not None:
			
 
				+        assert len(texts) == len(
			
 
				+            scores
			
 
				+        ), "The number of txts and corresponding scores must match"
			
 
				+
			
 
				+    def create_blank_img():
			
 
				+        blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
			
 
				+        blank_img[:, img_w - 1 :] = 0
			
 
				+        blank_img = Image.fromarray(blank_img).convert("RGB")
			
 
				+        draw_txt = ImageDraw.Draw(blank_img)
			
 
				+        return blank_img, draw_txt
			
 
				+
			
 
				+    blank_img, draw_txt = create_blank_img()
			
 
				+
			
 
				+    font_size = 20
			
 
				+    txt_color = (0, 0, 0)
			
 
				+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				+
			
 
				+    gap = font_size + 5
			
 
				+    txt_img_list = []
			
 
				+    count, index = 1, 0
			
 
				+    for idx, txt in enumerate(texts):
			
 
				+        index += 1
			
 
				+        if scores[idx] < threshold or math.isnan(scores[idx]):
			
 
				+            index -= 1
			
 
				+            continue
			
 
				+        first_line = True
			
 
				+        while str_count(txt) >= img_w // font_size - 4:
			
 
				+            tmp = txt
			
 
				+            txt = tmp[: img_w // font_size - 4]
			
 
				+            if first_line:
			
 
				+                new_txt = str(index) + ": " + txt
			
 
				+                first_line = False
			
 
				+            else:
			
 
				+                new_txt = "    " + txt
			
 
				+            draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
			
 
				+            txt = tmp[img_w // font_size - 4 :]
			
 
				+            if count >= img_h // gap - 1:
			
 
				+                txt_img_list.append(np.array(blank_img))
			
 
				+                blank_img, draw_txt = create_blank_img()
			
 
				+                count = 0
			
 
				+            count += 1
			
 
				+        if first_line:
			
 
				+            new_txt = str(index) + ": " + txt + "   " + "%.3f" % (scores[idx])
			
 
				+        else:
			
 
				+            new_txt = "  " + txt + "  " + "%.3f" % (scores[idx])
			
 
				+        draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
			
 
				+        # whether add new blank img or not
			
 
				+        if count >= img_h // gap - 1 and idx + 1 < len(texts):
			
 
				+            txt_img_list.append(np.array(blank_img))
			
 
				+            blank_img, draw_txt = create_blank_img()
			
 
				+            count = 0
			
 
				+        count += 1
			
 
				+    txt_img_list.append(np.array(blank_img))
			
 
				+    if len(txt_img_list) == 1:
			
 
				+        blank_img = np.array(txt_img_list[0])
			
 
				+    else:
			
 
				+        blank_img = np.concatenate(txt_img_list, axis=1)
			
 
				+    return np.array(blank_img)
			
 
				+
			
 
				+
			
 
				+def draw_ocr(
			
 
				+    image,
			
 
				+    boxes,
			
 
				+    txts=None,
			
 
				+    scores=None,
			
 
				+    drop_score=0.5,
			
 
				+    font_path=str(module_dir / "fonts/simfang.ttf"),
			
 
				+):
			
 
				+    """
			
 
				+    Visualize the results of OCR detection and recognition
			
 
				+    args:
			
 
				+        image(Image|array): RGB image
			
 
				+        boxes(list): boxes with shape(N, 4, 2)
			
 
				+        txts(list): the texts
			
 
				+        scores(list): txxs corresponding scores
			
 
				+        drop_score(float): only scores greater than drop_threshold will be visualized
			
 
				+        font_path: the path of font which is used to draw text
			
 
				+    return(array):
			
 
				+        the visualized img
			
 
				+    """
			
 
				+    if scores is None:
			
 
				+        scores = [1] * len(boxes)
			
 
				+    box_num = len(boxes)
			
 
				+    for i in range(box_num):
			
 
				+        if scores is not None and (scores[i] < drop_score or math.isnan(scores[i])):
			
 
				+            continue
			
 
				+        box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64)
			
 
				+        image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
			
 
				+    if txts is not None:
			
 
				+        img = np.array(resize_img(image, input_size=600))
			
 
				+        txt_img = text_visual(
			
 
				+            txts,
			
 
				+            scores,
			
 
				+            img_h=img.shape[0],
			
 
				+            img_w=600,
			
 
				+            threshold=drop_score,
			
 
				+            font_path=font_path,
			
 
				+        )
			
 
				+        img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
			
 
				+        return img
			
 
				+    return image
			
 
				+
			
 
				+
			
 
				+def base64_to_cv2(b64str):
			
 
				+    import base64
			
 
				+
			
 
				+    data = base64.b64decode(b64str.encode("utf8"))
			
 
				+    data = np.frombuffer(data, np.uint8)
			
 
				+    data = cv2.imdecode(data, cv2.IMREAD_COLOR)
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def str2bool(v):
			
 
				+    return v.lower() in ("true", "t", "1")
			
 
				+
			
 
				+
			
 
				+def infer_args():
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    # params for prediction engine
			
 
				+    parser.add_argument("--use_gpu", type=str2bool, default=True)
			
 
				+    parser.add_argument("--use_xpu", type=str2bool, default=False)
			
 
				+    parser.add_argument("--use_npu", type=str2bool, default=False)
			
 
				+    parser.add_argument("--ir_optim", type=str2bool, default=True)
			
 
				+    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
			
 
				+    parser.add_argument("--min_subgraph_size", type=int, default=15)
			
 
				+    parser.add_argument("--precision", type=str, default="fp32")
			
 
				+    parser.add_argument("--gpu_mem", type=int, default=500)
			
 
				+    parser.add_argument("--gpu_id", type=int, default=0)
			
 
				+
			
 
				+    # params for text detector
			
 
				+    parser.add_argument("--image_dir", type=str)
			
 
				+    parser.add_argument("--page_num", type=int, default=0)
			
 
				+    parser.add_argument("--det_algorithm", type=str, default="DB")
			
 
				+    parser.add_argument(
			
 
				+        "--det_model_dir",
			
 
				+        type=str,
			
 
				+        default=str(module_dir / "models/ppocrv5/det/det.onnx"),
			
 
				+    )
			
 
				+    parser.add_argument("--det_limit_side_len", type=float, default=960)
			
 
				+    parser.add_argument("--det_limit_type", type=str, default="max")
			
 
				+    parser.add_argument("--det_box_type", type=str, default="quad")
			
 
				+
			
 
				+    # DB parmas
			
 
				+    parser.add_argument("--det_db_thresh", type=float, default=0.3)
			
 
				+    parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
			
 
				+    parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
			
 
				+    parser.add_argument("--max_batch_size", type=int, default=10)
			
 
				+    parser.add_argument("--use_dilation", type=str2bool, default=False)
			
 
				+    parser.add_argument("--det_db_score_mode", type=str, default="fast")
			
 
				+
			
 
				+    # EAST parmas
			
 
				+    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
			
 
				+    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
			
 
				+    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
			
 
				+
			
 
				+    # SAST parmas
			
 
				+    parser.add_argument("--det_sast_score_thresh", type=float, default=0.5)
			
 
				+    parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2)
			
 
				+
			
 
				+    # PSE parmas
			
 
				+    parser.add_argument("--det_pse_thresh", type=float, default=0)
			
 
				+    parser.add_argument("--det_pse_box_thresh", type=float, default=0.85)
			
 
				+    parser.add_argument("--det_pse_min_area", type=float, default=16)
			
 
				+    parser.add_argument("--det_pse_scale", type=int, default=1)
			
 
				+
			
 
				+    # FCE parmas
			
 
				+    parser.add_argument("--scales", type=list, default=[8, 16, 32])
			
 
				+    parser.add_argument("--alpha", type=float, default=1.0)
			
 
				+    parser.add_argument("--beta", type=float, default=1.0)
			
 
				+    parser.add_argument("--fourier_degree", type=int, default=5)
			
 
				+
			
 
				+    # params for text recognizer
			
 
				+    parser.add_argument("--rec_algorithm", type=str, default="SVTR_LCNet")
			
 
				+    parser.add_argument(
			
 
				+        "--rec_model_dir",
			
 
				+        type=str,
			
 
				+        default=str(module_dir / "models/ppocrv5/rec/rec.onnx"),
			
 
				+    )
			
 
				+    parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
			
 
				+    parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
			
 
				+    parser.add_argument("--rec_batch_num", type=int, default=6)
			
 
				+    parser.add_argument("--max_text_length", type=int, default=25)
			
 
				+    parser.add_argument(
			
 
				+        "--rec_char_dict_path",
			
 
				+        type=str,
			
 
				+        default=str(module_dir / "models/ppocrv5/ppocrv5_dict.txt"),
			
 
				+    )
			
 
				+    parser.add_argument("--use_space_char", type=str2bool, default=True)
			
 
				+    parser.add_argument(
			
 
				+        "--vis_font_path", type=str, default=str(module_dir / "fonts/simfang.ttf")
			
 
				+    )
			
 
				+    parser.add_argument("--drop_score", type=float, default=0.5)
			
 
				+
			
 
				+    # params for e2e
			
 
				+    parser.add_argument("--e2e_algorithm", type=str, default="PGNet")
			
 
				+    parser.add_argument("--e2e_model_dir", type=str)
			
 
				+    parser.add_argument("--e2e_limit_side_len", type=float, default=768)
			
 
				+    parser.add_argument("--e2e_limit_type", type=str, default="max")
			
 
				+
			
 
				+    # PGNet parmas
			
 
				+    parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
			
 
				+    parser.add_argument(
			
 
				+        "--e2e_char_dict_path",
			
 
				+        type=str,
			
 
				+        default=str(module_dir / "ppocr/utils/ic15_dict.txt"),
			
 
				+    )
			
 
				+    parser.add_argument("--e2e_pgnet_valid_set", type=str, default="totaltext")
			
 
				+    parser.add_argument("--e2e_pgnet_mode", type=str, default="fast")
			
 
				+
			
 
				+    # params for text classifier
			
 
				+    parser.add_argument("--use_angle_cls", type=str2bool, default=False)
			
 
				+    parser.add_argument(
			
 
				+        "--cls_model_dir",
			
 
				+        type=str,
			
 
				+        default=str(module_dir / "models/ppocrv4/cls/cls.onnx"),
			
 
				+    )
			
 
				+    parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
			
 
				+    parser.add_argument("--label_list", type=list, default=["0", "180"])
			
 
				+    parser.add_argument("--cls_batch_num", type=int, default=6)
			
 
				+    parser.add_argument("--cls_thresh", type=float, default=0.9)
			
 
				+
			
 
				+    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
			
 
				+    parser.add_argument("--cpu_threads", type=int, default=10)
			
 
				+    parser.add_argument("--use_pdserving", type=str2bool, default=False)
			
 
				+    parser.add_argument("--warmup", type=str2bool, default=False)
			
 
				+
			
 
				+    # SR parmas
			
 
				+    parser.add_argument("--sr_model_dir", type=str)
			
 
				+    parser.add_argument("--sr_image_shape", type=str, default="3, 32, 128")
			
 
				+    parser.add_argument("--sr_batch_num", type=int, default=1)
			
 
				+
			
 
				+    #
			
 
				+    parser.add_argument(
			
 
				+        "--draw_img_save_dir", type=str, default=str(module_dir / "inference_results")
			
 
				+    )
			
 
				+    parser.add_argument("--save_crop_res", type=str2bool, default=False)
			
 
				+    parser.add_argument(
			
 
				+        "--crop_res_save_dir", type=str, default=str(module_dir / "output")
			
 
				+    )
			
 
				+
			
 
				+    # multi-process
			
 
				+    parser.add_argument("--use_mp", type=str2bool, default=False)
			
 
				+    parser.add_argument("--total_process_num", type=int, default=1)
			
 
				+    parser.add_argument("--process_id", type=int, default=0)
			
 
				+
			
 
				+    parser.add_argument("--benchmark", type=str2bool, default=False)
			
 
				+    parser.add_argument(
			
 
				+        "--save_log_path", type=str, default=str(module_dir / "log_output/")
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument("--show_log", type=str2bool, default=True)
			
 
				+    parser.add_argument("--use_onnx", type=str2bool, default=False)
			
 
				+    return parser
			
--- a/python/onnxocr/result_img/555.png
+++ b/python/onnxocr/result_img/555.png
--- a/python/onnxocr/result_img/draw_ocr.jpg
+++ b/python/onnxocr/result_img/draw_ocr.jpg
--- a/python/onnxocr/result_img/draw_ocr2.jpg
+++ b/python/onnxocr/result_img/draw_ocr2.jpg
--- a/python/onnxocr/result_img/draw_ocr3.jpg
+++ b/python/onnxocr/result_img/draw_ocr3.jpg
--- a/python/onnxocr/result_img/draw_ocr4.jpg
+++ b/python/onnxocr/result_img/draw_ocr4.jpg
--- a/python/onnxocr/result_img/draw_ocr5.jpg
+++ b/python/onnxocr/result_img/draw_ocr5.jpg
--- a/python/onnxocr/result_img/draw_ocr_1.jpg
+++ b/python/onnxocr/result_img/draw_ocr_1.jpg
--- a/python/onnxocr/result_img/r1.png
+++ b/python/onnxocr/result_img/r1.png
--- a/python/onnxocr/result_img/r2.png
+++ b/python/onnxocr/result_img/r2.png
--- a/python/onnxocr/result_img/r3.png
+++ b/python/onnxocr/result_img/r3.png
--- a/python/onnxocr/static/webui.css
+++ b/python/onnxocr/static/webui.css
@@ -0,0 +1,313 @@
 
				+body {
			
 
				+    font-family: 'Segoe UI', 'Microsoft YaHei', Arial, sans-serif;
			
 
				+    background: linear-gradient(135deg, #e0e7ff 0%, #f8fafc 100%);
			
 
				+    margin: 0;
			
 
				+    min-height: 100vh;
			
 
				+    width: 100vw;
			
 
				+    box-sizing: border-box;
			
 
				+}
			
 
				+.container {
			
 
				+    width: 90vw;
			
 
				+    min-height: 80vh;
			
 
				+    margin: 4vh auto 0 auto;
			
 
				+    background: #fff;
			
 
				+    border-radius: 22px;
			
 
				+    box-shadow: 0 10px 40px #7b8cff22, 0 1.5px 8px #0001;
			
 
				+    padding: 56px 0 40px 0;
			
 
				+    display: flex;
			
 
				+    flex-direction: column;
			
 
				+    align-items: center;
			
 
				+    justify-content: flex-start;
			
 
				+    max-width: 1100px;
			
 
				+    transition: box-shadow 0.3s;
			
 
				+}
			
 
				+h2 {
			
 
				+    text-align: center;
			
 
				+    margin-bottom: 36px;
			
 
				+    font-size: 2.5rem;
			
 
				+    color: #2d3a4a;
			
 
				+    letter-spacing: 2px;
			
 
				+    font-weight: 700;
			
 
				+    text-shadow: 0 2px 8px #7b8cff22;
			
 
				+}
			
 
				+.form-row {
			
 
				+    margin-bottom: 22px;
			
 
				+    display: flex;
			
 
				+    align-items: center;
			
 
				+    justify-content: center;
			
 
				+    width: 100%;
			
 
				+}
			
 
				+.form-row label {
			
 
				+    margin-right: 12px;
			
 
				+    font-weight: 500;
			
 
				+    color: #3b4252;
			
 
				+}
			
 
				+#fileInput {
			
 
				+    display: none;
			
 
				+}
			
 
				+.drop-zone {
			
 
				+    border: 2.5px dashed #7b8cff;
			
 
				+    border-radius: 14px;
			
 
				+    padding: 48px 0;
			
 
				+    text-align: center;
			
 
				+    color: #5a5a5a;
			
 
				+    background: #f4f7ff;
			
 
				+    cursor: pointer;
			
 
				+    margin-bottom: 20px;
			
 
				+    font-size: 1.15rem;
			
 
				+    transition: background 0.2s, border-color 0.2s;
			
 
				+    width: 96%;
			
 
				+    min-width: 320px;
			
 
				+    max-width: 1200px;
			
 
				+    margin-left: auto;
			
 
				+    margin-right: auto;
			
 
				+    display: flex;
			
 
				+    align-items: center;
			
 
				+    justify-content: center;
			
 
				+    min-height: 120px;
			
 
				+    box-sizing: border-box;
			
 
				+    box-shadow: 0 2px 12px #7b8cff11;
			
 
				+}
			
 
				+.drop-zone.dragover {
			
 
				+    background: #e0eaff;
			
 
				+    border-color: #3b82f6;
			
 
				+}
			
 
				+#resultArea {
			
 
				+    margin-top: 32px;
			
 
				+    width: 90%;
			
 
				+    max-width: 900px;
			
 
				+}
			
 
				+.download-row {
			
 
				+    display: flex;
			
 
				+    justify-content: flex-end;
			
 
				+    margin-top: 8px;
			
 
				+    width: 90%;
			
 
				+    max-width: 900px;
			
 
				+}
			
 
				+.result-block, #previewArea {
			
 
				+    display: flex;
			
 
				+    align-items: flex-start;
			
 
				+    gap: 18px;
			
 
				+}
			
 
				+.ocr-image-preview {
			
 
				+    max-width: 320px;
			
 
				+    max-height: 220px;
			
 
				+    border-radius: 8px;
			
 
				+    box-shadow: 0 1px 8px #0002;
			
 
				+    background: #fff;
			
 
				+    margin-right: 8px;
			
 
				+}
			
 
				+.ocr-text-content {
			
 
				+    flex: 1;
			
 
				+    min-width: 0;
			
 
				+    max-height: 220px;
			
 
				+    overflow-y: auto;
			
 
				+    background: #f7faff;
			
 
				+    border-radius: 8px;
			
 
				+    padding: 12px 16px;
			
 
				+    box-sizing: border-box;
			
 
				+    box-shadow: 0 1px 4px #0001;
			
 
				+    display: flex;
			
 
				+    flex-direction: column;
			
 
				+    position: relative;
			
 
				+}
			
 
				+.copy-btn {
			
 
				+    background: none;
			
 
				+    border: none;
			
 
				+    cursor: pointer;
			
 
				+    padding: 2px;
			
 
				+    margin-left: 8px;
			
 
				+    border-radius: 4px;
			
 
				+    transition: background 0.2s;
			
 
				+    display: flex;
			
 
				+    align-items: center;
			
 
				+}
			
 
				+.copy-btn:hover {
			
 
				+    background: #e0eaff;
			
 
				+}
			
 
				+.copy-btn svg {
			
 
				+    display: block;
			
 
				+}
			
 
				+.result-block {
			
 
				+    background: #f7faff;
			
 
				+    border-radius: 8px;
			
 
				+    padding: 0;
			
 
				+    margin-bottom: 14px;
			
 
				+    border-left: 4px solid #7b8cff;
			
 
				+    box-shadow: 0 1px 4px #0001;
			
 
				+}
			
 
				+.ocr-text-content pre {
			
 
				+    white-space: pre-wrap;
			
 
				+    word-break: break-all;
			
 
				+    font-size: 1.01rem;
			
 
				+    color: #2d3a4a;
			
 
				+    margin: 0;
			
 
				+}
			
 
				+#downloadBtn {
			
 
				+    margin-top: 22px;
			
 
				+    display: none;
			
 
				+    background: linear-gradient(90deg, #7b8cff 0%, #3b82f6 100%);
			
 
				+    color: #fff;
			
 
				+    border: none;
			
 
				+    border-radius: 6px;
			
 
				+    padding: 10px 28px;
			
 
				+    font-size: 1.08rem;
			
 
				+    cursor: pointer;
			
 
				+    box-shadow: 0 2px 8px #7b8cff22;
			
 
				+    transition: background 0.2s;
			
 
				+}
			
 
				+#downloadBtn:hover {
			
 
				+    background: linear-gradient(90deg, #3b82f6 0%, #7b8cff 100%);
			
 
				+}
			
 
				+#loading {
			
 
				+    display: none;
			
 
				+    text-align: center;
			
 
				+    margin-top: 20px;
			
 
				+    color: #3b82f6;
			
 
				+    font-size: 1.1rem;
			
 
				+}
			
 
				+#fileList {
			
 
				+    margin: 0;
			
 
				+    padding: 0 0 0 20px;
			
 
				+    color: #444;
			
 
				+    font-size: 15px;
			
 
				+    min-height: 24px;
			
 
				+}
			
 
				+#clearBtn {
			
 
				+    margin-left: 18px;
			
 
				+    background: #f3f4f6;
			
 
				+    color: #3b4252;
			
 
				+    border: 1.5px solid #d1d5db;
			
 
				+    border-radius: 6px;
			
 
				+    padding: 7px 18px;
			
 
				+    font-size: 1rem;
			
 
				+    cursor: pointer;
			
 
				+    transition: background 0.2s, border-color 0.2s;
			
 
				+}
			
 
				+#clearBtn:hover {
			
 
				+    background: #e0eaff;
			
 
				+    border-color: #7b8cff;
			
 
				+}
			
 
				+button[type="submit"] {
			
 
				+    background: linear-gradient(90deg, #7b8cff 0%, #3b82f6 100%);
			
 
				+    color: #fff;
			
 
				+    border: none;
			
 
				+    border-radius: 6px;
			
 
				+    padding: 10px 32px;
			
 
				+    font-size: 1.08rem;
			
 
				+    cursor: pointer;
			
 
				+    box-shadow: 0 2px 8px #7b8cff22;
			
 
				+    transition: background 0.2s;
			
 
				+    display: block;
			
 
				+    margin: 0 auto;
			
 
				+}
			
 
				+button[type="submit"]:hover {
			
 
				+    background: linear-gradient(90deg, #3b82f6 0%, #7b8cff 100%);
			
 
				+}
			
 
				+select {
			
 
				+    border-radius: 6px;
			
 
				+    border: 1.5px solid #d1d5db;
			
 
				+    padding: 7px 16px;
			
 
				+    font-size: 1rem;
			
 
				+    background: #f9fafb;
			
 
				+    color: #2d3a4a;
			
 
				+    outline: none;
			
 
				+    transition: border-color 0.2s;
			
 
				+}
			
 
				+select:focus {
			
 
				+    border-color: #7b8cff;
			
 
				+}
			
 
				+#progressArea {
			
 
				+    width: 80%;
			
 
				+    margin: 32px 0 0 0;
			
 
				+    display: none;
			
 
				+    flex-direction: column;
			
 
				+    align-items: center;
			
 
				+    gap: 10px;
			
 
				+}
			
 
				+.progress-bar-bg {
			
 
				+    width: 80%;
			
 
				+    height: 16px;
			
 
				+    background: #e0eaff;
			
 
				+    border-radius: 8px;
			
 
				+    overflow: hidden;
			
 
				+    margin-bottom: 4px;
			
 
				+}
			
 
				+.progress-bar {
			
 
				+    height: 100%;
			
 
				+    background: linear-gradient(90deg, #7b8cff 0%, #3b82f6 100%);
			
 
				+    border-radius: 8px;
			
 
				+    width: 0%;
			
 
				+    transition: width 0.3s;
			
 
				+}
			
 
				+#progressText {
			
 
				+    color: #3b4252;
			
 
				+    font-size: 1rem;
			
 
				+    margin-bottom: 2px;
			
 
				+}
			
 
				+#elapsedTime {
			
 
				+    color: #888;
			
 
				+    font-size: 0.98rem;
			
 
				+    margin-top: 2px;
			
 
				+}
			
 
				+@media (max-width: 900px) {
			
 
				+    .container {
			
 
				+        width: 98vw;
			
 
				+        min-height: 90vh;
			
 
				+        padding: 18px 0 18px 0;
			
 
				+        border-radius: 0;
			
 
				+        box-shadow: none;
			
 
				+    }
			
 
				+    .drop-zone {
			
 
				+        width: 96vw;
			
 
				+        min-width: 0;
			
 
				+        max-width: 100vw;
			
 
				+    }
			
 
				+}
			
 
				+@media (max-width: 600px) {
			
 
				+    .container {
			
 
				+        width: 100vw;
			
 
				+        min-height: 100vh;
			
 
				+        padding: 0;
			
 
				+        border-radius: 0;
			
 
				+        box-shadow: none;
			
 
				+    }
			
 
				+    h2 {
			
 
				+        font-size: 1.3rem;
			
 
				+    }
			
 
				+    .form-row {
			
 
				+        flex-direction: column;
			
 
				+        align-items: stretch;
			
 
				+        width: 100%;
			
 
				+    }
			
 
				+    .form-row label {
			
 
				+        margin-bottom: 6px;
			
 
				+    }
			
 
				+    .drop-zone {
			
 
				+        width: 98vw;
			
 
				+        min-width: 0;
			
 
				+        max-width: 100vw;
			
 
				+        padding: 28px 0;
			
 
				+        min-height: 60px;
			
 
				+        font-size: 1rem;
			
 
				+    }
			
 
				+    #downloadBtn, button[type="submit"] {
			
 
				+        width: 100%;
			
 
				+        margin-left: 0;
			
 
				+        margin-top: 10px;
			
 
				+    }
			
 
				+    #clearBtn {
			
 
				+        width: 100%;
			
 
				+        min-width: 0;
			
 
				+        max-width: 100vw;
			
 
				+        font-size: 0.98rem;
			
 
				+        padding: 7px 10px;
			
 
				+        margin-top: 10px;
			
 
				+        margin-left: 0;
			
 
				+        align-self: stretch;
			
 
				+    }
			
 
				+    #resultArea, .download-row {
			
 
				+        width: 98vw;
			
 
				+        max-width: 100vw;
			
 
				+    }
			
 
				+}
			
--- a/python/onnxocr/templates/index.html
+++ b/python/onnxocr/templates/index.html
@@ -0,0 +1,269 @@
 
				+<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8" />
			
 
				+    <title>OCR</title>
			
 
				+    <style>
			
 
				+        * {
			
 
				+            box-sizing: border-box;
			
 
				+            margin: 0;
			
 
				+            padding: 0;
			
 
				+        }
			
 
				+
			
 
				+        body, html {
			
 
				+            height: 100%;
			
 
				+            font-family: sans-serif;
			
 
				+            background-color: #ffffff;
			
 
				+        }
			
 
				+
			
 
				+        .container {
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+            align-items: center;
			
 
				+            min-height: 100vh;
			
 
				+            padding: 5px 20px 20px 20px;
			
 
				+        }
			
 
				+
			
 
				+        .drop-zone {
			
 
				+            border: 2px dashed #888;
			
 
				+            border-radius: 10px;
			
 
				+            width: 400px;
			
 
				+            height: 150px;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            text-align: center;
			
 
				+            color: #555;
			
 
				+            font-size: 16px;
			
 
				+            cursor: pointer;
			
 
				+            transition: background-color 0.3s;
			
 
				+            margin-bottom: 20px;
			
 
				+        }
			
 
				+
			
 
				+        .drop-zone.dragover {
			
 
				+            background-color: #f0f0f0;
			
 
				+        }
			
 
				+
			
 
				+        .canvas-container {
			
 
				+            display: none;
			
 
				+            gap: 20px;
			
 
				+            flex-wrap: wrap;
			
 
				+            justify-content: center;
			
 
				+            margin-top: 20px;
			
 
				+        }
			
 
				+
			
 
				+        canvas {
			
 
				+            border: 1px solid #ccc;
			
 
				+            max-width: 600px;
			
 
				+            height: auto;
			
 
				+        }
			
 
				+
			
 
				+        /* 加载动画 */
			
 
				+        .loading-overlay {
			
 
				+            position: fixed;
			
 
				+            top: 0;
			
 
				+            left: 0;
			
 
				+            right: 0;
			
 
				+            bottom: 0;
			
 
				+            background: rgba(255, 255, 255, 0.7);
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            z-index: 9999;
			
 
				+            display: none;
			
 
				+        }
			
 
				+
			
 
				+        .spinner {
			
 
				+            width: 50px;
			
 
				+            height: 50px;
			
 
				+            border: 5px solid #ccc;
			
 
				+            border-top: 5px solid #3498db;
			
 
				+            border-radius: 50%;
			
 
				+            animation: spin 1s linear infinite;
			
 
				+        }
			
 
				+
			
 
				+        @keyframes spin {
			
 
				+            0% { transform: rotate(0deg); }
			
 
				+            100% { transform: rotate(360deg); }
			
 
				+        }
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+
			
 
				+<div class="container">
			
 
				+    <h2>OCR</h2>
			
 
				+    <div id="dropZone" class="drop-zone">
			
 
				+        点击或拖动图片到这里上传
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 加载动画 -->
			
 
				+    <div id="loadingOverlay" class="loading-overlay">
			
 
				+        <div class="spinner"></div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 结果区域 -->
			
 
				+    <div id="canvasContainer" class="canvas-container">
			
 
				+        <canvas id="canvasOriginal"></canvas>
			
 
				+        <canvas id="canvasTextOnly"></canvas>
			
 
				+    </div>
			
 
				+</div>
			
 
				+
			
 
				+<script>
			
 
				+    const dropZone = document.getElementById('dropZone');
			
 
				+    const fileInput = document.createElement('input');
			
 
				+    fileInput.type = 'file';
			
 
				+    fileInput.accept = 'image/*';
			
 
				+
			
 
				+    const MAX_DISPLAY_WIDTH = 500;
			
 
				+
			
 
				+    const canvasOriginal = document.getElementById('canvasOriginal');
			
 
				+    const ctxOriginal = canvasOriginal.getContext('2d');
			
 
				+
			
 
				+    const canvasTextOnly = document.getElementById('canvasTextOnly');
			
 
				+    const ctxTextOnly = canvasTextOnly.getContext('2d');
			
 
				+
			
 
				+    const canvasContainer = document.getElementById('canvasContainer');
			
 
				+    const loadingOverlay = document.getElementById('loadingOverlay');
			
 
				+
			
 
				+    // 点击上传
			
 
				+    dropZone.addEventListener('click', () => {
			
 
				+        fileInput.click();
			
 
				+    });
			
 
				+
			
 
				+    // 拖拽上传
			
 
				+    dropZone.addEventListener('dragover', (e) => {
			
 
				+        e.preventDefault();
			
 
				+        dropZone.classList.add('dragover');
			
 
				+    });
			
 
				+
			
 
				+    dropZone.addEventListener('dragleave', () => {
			
 
				+        dropZone.classList.remove('dragover');
			
 
				+    });
			
 
				+
			
 
				+    dropZone.addEventListener('drop', (e) => {
			
 
				+        e.preventDefault();
			
 
				+        dropZone.classList.remove('dragover');
			
 
				+        const file = e.dataTransfer.files[0];
			
 
				+        if (file && file.type.startsWith('image/')) {
			
 
				+            handleImage(file);
			
 
				+        } else {
			
 
				+            alert("请上传图片文件");
			
 
				+        }
			
 
				+    });
			
 
				+
			
 
				+    fileInput.addEventListener('change', () => {
			
 
				+        const file = fileInput.files[0];
			
 
				+        if (file) {
			
 
				+            handleImage(file);
			
 
				+        }
			
 
				+    });
			
 
				+
			
 
				+    function handleImage(file) {
			
 
				+        const reader = new FileReader();
			
 
				+        reader.onload = function (e) {
			
 
				+            const img = new Image();
			
 
				+            img.onload = function () {
			
 
				+                const scale = MAX_DISPLAY_WIDTH / img.width;
			
 
				+                const displayWidth = img.width * scale;
			
 
				+                const displayHeight = img.height * scale;
			
 
				+
			
 
				+                // 设置 canvas 尺寸
			
 
				+                canvasOriginal.width = displayWidth;
			
 
				+                canvasOriginal.height = displayHeight;
			
 
				+                canvasTextOnly.width = displayWidth;
			
 
				+                canvasTextOnly.height = displayHeight;
			
 
				+
			
 
				+                // 清除之前的绘图
			
 
				+                ctxOriginal.clearRect(0, 0, displayWidth, displayHeight);
			
 
				+                ctxTextOnly.clearRect(0, 0, displayWidth, displayHeight);
			
 
				+
			
 
				+                // 绘制原始图像
			
 
				+                ctxOriginal.drawImage(img, 0, 0, displayWidth, displayHeight);
			
 
				+
			
 
				+                // 显示 canvas 容器
			
 
				+                canvasContainer.style.display = "flex";
			
 
				+
			
 
				+                // 显示加载动画
			
 
				+                loadingOverlay.style.display = "flex";
			
 
				+
			
 
				+                const base64Image = e.target.result.split(',')[1];
			
 
				+                sendToOCR(base64Image, img.width, img.height, displayWidth, displayHeight);
			
 
				+            };
			
 
				+            img.src = e.target.result;
			
 
				+        };
			
 
				+        reader.readAsDataURL(file);
			
 
				+    }
			
 
				+
			
 
				+    function sendToOCR(base64Image, originalWidth, originalHeight, displayWidth, displayHeight) {
			
 
				+        fetch('/ocr', {
			
 
				+            method: 'POST',
			
 
				+            headers: {
			
 
				+                'Content-Type': 'application/json'
			
 
				+            },
			
 
				+            body: JSON.stringify({ image: base64Image })
			
 
				+        })
			
 
				+            .then(res => res.json())
			
 
				+            .then(data => {
			
 
				+                drawBoxesAndTextWithOrientation(data.results, originalWidth, originalHeight, displayWidth, displayHeight);
			
 
				+                // 隐藏加载动画
			
 
				+                loadingOverlay.style.display = "none";
			
 
				+            })
			
 
				+            .catch(err => {
			
 
				+                console.error("OCR 调用失败", err);
			
 
				+                alert("OCR 识别失败，请查看控制台日志");
			
 
				+                loadingOverlay.style.display = "none";
			
 
				+            });
			
 
				+    }
			
 
				+
			
 
				+    function drawBoxesAndTextWithOrientation(results, originalWidth, originalHeight, displayWidth, displayHeight) {
			
 
				+        ctxTextOnly.clearRect(0, 0, canvasTextOnly.width, canvasTextOnly.height);
			
 
				+        ctxTextOnly.font = "12px sans-serif";
			
 
				+        ctxTextOnly.fillStyle = "black";
			
 
				+
			
 
				+        const scaleX = displayWidth / originalWidth;
			
 
				+        const scaleY = displayHeight / originalHeight;
			
 
				+
			
 
				+        results.forEach(result => {
			
 
				+            const box = result.bounding_box.map(([x, y]) => [
			
 
				+                x * scaleX,
			
 
				+                y * scaleY
			
 
				+            ]);
			
 
				+
			
 
				+            const [[x1, y1], [x2, y2], [x3, y3]] = box;
			
 
				+
			
 
				+            const width = Math.abs(x2 - x1);
			
 
				+            const height = Math.abs(y3 - y1);
			
 
				+
			
 
				+            const decodedText = decodeUnicode(result.text);
			
 
				+
			
 
				+            // 判断方向：宽 < 高 → 竖排
			
 
				+            if (width < height && height > 10) {
			
 
				+                drawVerticalText(ctxTextOnly, decodedText, x1, y1, height);
			
 
				+            } else {
			
 
				+                ctxTextOnly.fillText(decodedText, x1, y1 + height / 2);
			
 
				+            }
			
 
				+
			
 
				+            // 绘制左侧红色框
			
 
				+            ctxOriginal.strokeStyle = "red";
			
 
				+            ctxOriginal.lineWidth = 1;
			
 
				+            ctxOriginal.strokeRect(x1, y1, width, height);
			
 
				+        });
			
 
				+    }
			
 
				+
			
 
				+    // 竖排文字绘制函数
			
 
				+    function drawVerticalText(ctx, text, x, y, height) {
			
 
				+        for (let i = 0; i < text.length; i++) {
			
 
				+            ctx.fillText(text[i], x, y + i * 14);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Unicode 解码函数
			
 
				+    function decodeUnicode(str) {
			
 
				+        return str.replace(/\\u([0-9a-fA-F]{4})/g, function (_, hex) {
			
 
				+            return String.fromCharCode(parseInt(hex, 16));
			
 
				+        });
			
 
				+    }
			
 
				+</script>
			
 
				+
			
 
				+</body>
			
 
				+</html>
			
--- a/python/onnxocr/templates/webui.html
+++ b/python/onnxocr/templates/webui.html
@@ -0,0 +1,245 @@
 
				+<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8" />
			
 
				+    <title>OnnxOCR Web UI</title>
			
 
				+    <link rel="stylesheet" href="/static/webui.css">
			
 
				+</head>
			
 
				+<body>
			
 
				+<div class="container">
			
 
				+    <h2>OnnxOCR Web UI</h2>
			
 
				+    <form id="ocrForm">
			
 
				+        <div class="form-row" style="justify-content: flex-start;">
			
 
				+            <label for="modelSelect">选择模型：</label>
			
 
				+            <select id="modelSelect" name="model_name">
			
 
				+                <option value="PP-OCRv5">PP-OCRv5</option>
			
 
				+                <option value="PP-OCRv4">PP-OCRv4</option>
			
 
				+                <option value="ch_ppocr_server_v2.0">ch_ppocr_server_v2.0</option>
			
 
				+            </select>
			
 
				+            <button type="button" id="clearBtn">清除</button>
			
 
				+        </div>
			
 
				+        <div class="form-row">
			
 
				+            <div id="dropZone" class="drop-zone">点击或拖拽图片/PDF文件到此处（可多选）</div>
			
 
				+            <input id="fileInput" type="file" name="files" multiple accept="image/*,.pdf" />
			
 
				+        </div>
			
 
				+        <div class="form-row">
			
 
				+            <ul id="fileList"></ul>
			
 
				+        </div>
			
 
				+        <div class="form-row button-row">
			
 
				+            <button type="submit">开始识别</button>
			
 
				+        </div>
			
 
				+    </form>
			
 
				+    <div id="previewArea"></div>
			
 
				+    <div id="progressArea">
			
 
				+        <div id="progressText"></div>
			
 
				+        <div class="progress-bar-bg"><div class="progress-bar" id="progressBar"></div></div>
			
 
				+        <div id="elapsedTime"></div>
			
 
				+    </div>
			
 
				+    <div class="download-row">
			
 
				+        <button id="downloadBtn">下载全部TXT（压缩包）</button>
			
 
				+    </div>
			
 
				+    <div id="loading">正在识别，请稍候...</div>
			
 
				+    <div id="resultArea"></div>
			
 
				+</div>
			
 
				+<div id="globalTip" style="display:none;position:fixed;top:32px;right:32px;z-index:9999;min-width:120px;padding:12px 28px;background:#7b8cff;color:#fff;border-radius:8px;box-shadow:0 2px 8px #7b8cff22;font-size:1.08rem;transition:opacity 0.3s;opacity:0;"></div>
			
 
				+<script>
			
 
				+const dropZone = document.getElementById('dropZone');
			
 
				+const fileInput = document.getElementById('fileInput');
			
 
				+const ocrForm = document.getElementById('ocrForm');
			
 
				+const resultArea = document.getElementById('resultArea');
			
 
				+const downloadBtn = document.getElementById('downloadBtn');
			
 
				+const loading = document.getElementById('loading');
			
 
				+const fileList = document.getElementById('fileList');
			
 
				+const clearBtn = document.getElementById('clearBtn');
			
 
				+const progressArea = document.getElementById('progressArea');
			
 
				+const progressBar = document.getElementById('progressBar');
			
 
				+const progressText = document.getElementById('progressText');
			
 
				+const elapsedTime = document.getElementById('elapsedTime');
			
 
				+let lastZipUrl = null;
			
 
				+
			
 
				+function updateFileList() {
			
 
				+    fileList.innerHTML = '';
			
 
				+    if (fileInput.files.length === 0) {
			
 
				+        fileList.innerHTML = '<li style="color:#888;">未选择文件</li>';
			
 
				+        return;
			
 
				+    }
			
 
				+    for (const file of fileInput.files) {
			
 
				+        const li = document.createElement('li');
			
 
				+        li.textContent = file.name;
			
 
				+        fileList.appendChild(li);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// 拖拽上传
			
 
				+['dragenter','dragover'].forEach(evt => dropZone.addEventListener(evt, e => {
			
 
				+    e.preventDefault();
			
 
				+    dropZone.classList.add('dragover');
			
 
				+}));
			
 
				+['dragleave','drop'].forEach(evt => dropZone.addEventListener(evt, e => {
			
 
				+    e.preventDefault();
			
 
				+    dropZone.classList.remove('dragover');
			
 
				+}));
			
 
				+dropZone.addEventListener('click', () => fileInput.click());
			
 
				+dropZone.addEventListener('drop', e => {
			
 
				+    fileInput.files = e.dataTransfer.files;
			
 
				+    updateFileList();
			
 
				+});
			
 
				+fileInput.addEventListener('change', updateFileList);
			
 
				+
			
 
				+ocrForm.addEventListener('submit', async function(e) {
			
 
				+    e.preventDefault();
			
 
				+    if (!fileInput.files.length) {
			
 
				+        alert('请先选择图片或PDF文件');
			
 
				+        return;
			
 
				+    }
			
 
				+    loading.style.display = 'none'; // 立即隐藏 loading
			
 
				+    resultArea.innerHTML = '';
			
 
				+    downloadBtn.style.display = 'none';
			
 
				+    fileList.innerHTML = '';
			
 
				+    progressArea.style.display = 'flex';
			
 
				+    progressBar.style.width = '0%';
			
 
				+    progressText.textContent = '正在准备识别...';
			
 
				+    elapsedTime.textContent = '';
			
 
				+    const startTime = Date.now();
			
 
				+    const formData = new FormData();
			
 
				+    for (const file of fileInput.files) {
			
 
				+        formData.append('files', file);
			
 
				+    }
			
 
				+    formData.append('model_name', document.getElementById('modelSelect').value);
			
 
				+    // 保存所有图片文件的base名和File对象
			
 
				+    const imageFileMap = {};
			
 
				+    for (const file of fileInput.files) {
			
 
				+        if (file.type.startsWith('image/')) {
			
 
				+            const base = file.name.replace(/\.[^.]+$/, '');
			
 
				+            imageFileMap[base] = file;
			
 
				+        }
			
 
				+    }
			
 
				+    try {
			
 
				+        let fakeProgress = 0;
			
 
				+        const fakeTimer = setInterval(() => {
			
 
				+            if (fakeProgress < 80) {
			
 
				+                fakeProgress += Math.random() * 8 + 2;
			
 
				+                progressBar.style.width = Math.min(fakeProgress, 80) + '%';
			
 
				+                progressText.textContent = '正在识别文件...';
			
 
				+            }
			
 
				+        }, 300);
			
 
				+        const resp = await fetch('/ocr', { method: 'POST', body: formData });
			
 
				+        clearInterval(fakeTimer);
			
 
				+        progressBar.style.width = '100%';
			
 
				+        progressText.textContent = '识别完成';
			
 
				+        const data = await resp.json();
			
 
				+        if (!data.success) {
			
 
				+            progressArea.style.display = 'none';
			
 
				+            resultArea.innerHTML = `<div style='color:red;'>识别失败：${data.msg || ''}</div>`;
			
 
				+            lastZipUrl = null;
			
 
				+            return;
			
 
				+        }
			
 
				+        // 多图片多结果展示
			
 
				+        previewArea.innerHTML = '';
			
 
				+        resultArea.innerHTML = '';
			
 
				+        // 构建图片base名到File对象的映射
			
 
				+        const imgMap = {};
			
 
				+        for (const file of fileInput.files) {
			
 
				+            if (file.type.startsWith('image/')) {
			
 
				+                const base = file.name.replace(/\.[^.]+$/, '');
			
 
				+                imgMap[base] = file;
			
 
				+            }
			
 
				+        }
			
 
				+        data.results.forEach(r => {
			
 
				+            const txtBase = r.filename.replace(/\.txt$/, '');
			
 
				+            const div = document.createElement('div');
			
 
				+            div.className = 'result-block';
			
 
				+            let imgHtml = '';
			
 
				+            if (imgMap[txtBase]) {
			
 
				+                const url = URL.createObjectURL(imgMap[txtBase]);
			
 
				+                imgHtml = `<img class=\"ocr-image-preview\" src=\"${url}\" alt=\"${r.filename}\">`;
			
 
				+            }
			
 
				+            // 新增复制按钮
			
 
				+            const copyBtnHtml = `<button class=\"copy-btn\" title=\"复制文本\"><svg width=\"18\" height=\"18\" viewBox=\"0 0 20 20\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\"><rect x=\"5\" y=\"5\" width=\"10\" height=\"12\" rx=\"2\" fill=\"#7b8cff\"/><rect x=\"3\" y=\"3\" width=\"10\" height=\"12\" rx=\"2\" stroke=\"#7b8cff\" stroke-width=\"1.5\" fill=\"none\"/></svg></button>`;
			
 
				+            div.innerHTML = `${imgHtml}<div class=\"ocr-text-content\"><div style=\"display:flex;justify-content:space-between;align-items:flex-start;\"><b>${r.filename}</b>${copyBtnHtml}</div><pre>${r.content}</pre></div>`;
			
 
				+            resultArea.appendChild(div);
			
 
				+        });
			
 
				+        // 事件委托绑定复制按钮点击事件
			
 
				+        resultArea.addEventListener('click', function(e) {
			
 
				+            if (e.target.closest('.copy-btn')) {
			
 
				+                const btn = e.target.closest('.copy-btn');
			
 
				+                const pre = btn.closest('.ocr-text-content').querySelector('pre');
			
 
				+                if (pre) {
			
 
				+                    navigator.clipboard.writeText(pre.textContent).then(() => {
			
 
				+                        showTip('复制成功');
			
 
				+                    }).catch(() => {
			
 
				+                        showTip('复制失败，请手动复制', '#f87171');
			
 
				+                    });
			
 
				+                }
			
 
				+            }
			
 
				+        });
			
 
				+        const used = ((Date.now() - startTime) / 1000).toFixed(2);
			
 
				+        elapsedTime.textContent = `识别总耗时：${used} 秒`;
			
 
				+        downloadBtn.style.display = 'inline-block';
			
 
				+        lastZipUrl = data.zip_url || null;
			
 
				+    } catch (err) {
			
 
				+        progressArea.style.display = 'none';
			
 
				+        resultArea.innerHTML = `<div style='color:red;'>请求失败：${err}</div>`;
			
 
				+    }
			
 
				+});
			
 
				+
			
 
				+downloadBtn.addEventListener('click', function() {
			
 
				+    if (lastZipUrl) {
			
 
				+        // 采用 a 标签下载，兼容所有浏览器
			
 
				+        const a = document.createElement('a');
			
 
				+        a.href = lastZipUrl;
			
 
				+        a.download = '';
			
 
				+        document.body.appendChild(a);
			
 
				+        a.click();
			
 
				+        document.body.removeChild(a);
			
 
				+    } else {
			
 
				+        alert('未找到可下载的压缩包');
			
 
				+    }
			
 
				+});
			
 
				+
			
 
				+clearBtn.addEventListener('click', function() {
			
 
				+    fileInput.value = '';
			
 
				+    updateFileList();
			
 
				+    resultArea.innerHTML = '';
			
 
				+    downloadBtn.style.display = 'none';
			
 
				+    previewArea.innerHTML = '';
			
 
				+    progressArea.style.display = 'none';
			
 
				+    progressBar.style.width = '0%';
			
 
				+    progressText.textContent = '';
			
 
				+    elapsedTime.textContent = '';
			
 
				+});
			
 
				+
			
 
				+function showTip(msg, color = '#7b8cff') {
			
 
				+    const tip = document.getElementById('globalTip');
			
 
				+    tip.textContent = msg;
			
 
				+    tip.style.background = color;
			
 
				+    tip.style.display = 'block';
			
 
				+    tip.style.opacity = '1';
			
 
				+    clearTimeout(tip._timer);
			
 
				+    tip._timer = setTimeout(() => {
			
 
				+        tip.style.opacity = '0';
			
 
				+        setTimeout(() => { tip.style.display = 'none'; }, 400);
			
 
				+    }, 1500);
			
 
				+}
			
 
				+
			
 
				+// 复制按钮事件绑定
			
 
				+setTimeout(() => {
			
 
				+    document.querySelectorAll('.copy-btn').forEach(btn => {
			
 
				+        btn.onclick = function(e) {
			
 
				+            const pre = btn.closest('.ocr-text-content').querySelector('pre');
			
 
				+            if (pre) {
			
 
				+                navigator.clipboard.writeText(pre.textContent).then(() => {
			
 
				+                    btn.title = '已复制!';
			
 
				+                    btn.style.background = '#e0eaff';
			
 
				+                    setTimeout(() => {
			
 
				+                        btn.title = '复制文本';
			
 
				+                        btn.style.background = '';
			
 
				+                    }, 1200);
			
 
				+                });
			
 
				+            }
			
 
				+        };
			
 
				+    });
			
 
				+}, 100);
			
 
				+</script>
			
 
				+</body>
			
 
				+</html>
			
--- a/python/onnxocr/test_ocr.py
+++ b/python/onnxocr/test_ocr.py
@@ -0,0 +1,21 @@
 
				+import cv2
			
 
				+import time
			
 
				+from onnxocr.onnx_paddleocr import ONNXPaddleOcr,sav2Img
			
 
				+import sys
			
 
				+import time
			
 
				+#固定到onnx路径·
			
 
				+# sys.path.append('./paddle_to_onnx/onnx')
			
 
				+
			
 
				+model = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
			
 
				+
			
 
				+
			
 
				+img = cv2.imread('./onnxocr/test_images/715873facf064583b44ef28295126fa7.jpg')
			
 
				+s = time.time()
			
 
				+result = model.ocr(img)
			
 
				+e = time.time()
			
 
				+print("total time: {:.3f}".format(e - s))
			
 
				+print("result:", result)
			
 
				+for box in result[0]:
			
 
				+    print(box)
			
 
				+
			
 
				+sav2Img(img, result,name=str(time.time())+'.jpg')
			
--- a/python/onnxocr/webui.py
+++ b/python/onnxocr/webui.py
@@ -0,0 +1,135 @@
 
				+import os
			
 
				+import time
			
 
				+import zipfile
			
 
				+from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
			
 
				+from werkzeug.utils import secure_filename
			
 
				+from onnxocr.ocr_images_pdfs import OCRLogic
			
 
				+import cv2
			
 
				+import base64
			
 
				+import numpy as np
			
 
				+from onnxocr.onnx_paddleocr import ONNXPaddleOcr
			
 
				+
			
 
				+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
			
 
				+UPLOAD_ROOT = os.path.join(BASE_DIR, "uploads")
			
 
				+RESULT_ROOT = os.path.join(BASE_DIR, "results")
			
 
				+os.makedirs(UPLOAD_ROOT, exist_ok=True)
			
 
				+os.makedirs(RESULT_ROOT, exist_ok=True)
			
 
				+
			
 
				+MODEL_OPTIONS = ["PP-OCRv5", "PP-OCRv4", "ch_ppocr_server_v2.0"]
			
 
				+
			
 
				+app = Flask(__name__, static_folder="static", template_folder="templates")
			
 
				+app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024  # 200MB
			
 
				+
			
 
				+ocr_logic = OCRLogic(lambda msg: print(msg))
			
 
				+# 独立 OCR 模型实例，避免影响 ocr_logic
			
 
				+ocr_model_api = ONNXPaddleOcr(use_angle_cls=True, use_gpu=False)
			
 
				+
			
 
				+@app.route("/")
			
 
				+def index():
			
 
				+    return render_template("webui.html", model_options=MODEL_OPTIONS)
			
 
				+
			
 
				+@app.errorhandler(404)
			
 
				+def not_found(e):
			
 
				+    path = request.path
			
 
				+    if not path.startswith("/static") and not path.startswith("/download"):
			
 
				+        return redirect(url_for("index"))
			
 
				+    return jsonify({"detail": "NotFound"}), 404
			
 
				+
			
 
				+@app.route("/set_model", methods=["POST"])
			
 
				+def set_model():
			
 
				+    model_name = request.form.get("model_name")
			
 
				+    try:
			
 
				+        ocr_logic.set_model(model_name)
			
 
				+        return {"success": True, "msg": f"模型已切换为 {model_name}"}
			
 
				+    except Exception as e:
			
 
				+        return {"success": False, "msg": str(e)}
			
 
				+
			
 
				+@app.route("/ocr", methods=["POST"])
			
 
				+def ocr_files():
			
 
				+    files = request.files.getlist("files")
			
 
				+    model_name = request.form.get("model_name")
			
 
				+    if not files or not model_name:
			
 
				+        return jsonify({"success": False, "msg": "缺少文件或模型参数"}), 400
			
 
				+    try:
			
 
				+        ocr_logic.set_model(model_name)
			
 
				+    except Exception as e:
			
 
				+        return jsonify({"success": False, "msg": f"模型切换失败: {e}"}), 500
			
 
				+    timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				+    session_dir = os.path.join(RESULT_ROOT, timestamp)
			
 
				+    os.makedirs(session_dir, exist_ok=True)
			
 
				+    file_paths = []
			
 
				+    for file in files:
			
 
				+        filename = secure_filename(file.filename)
			
 
				+        file_path = os.path.join(session_dir, filename)
			
 
				+        file.save(file_path)
			
 
				+        file_paths.append(file_path)
			
 
				+    results = []
			
 
				+    def status_callback(msg): pass
			
 
				+    logic = OCRLogic(status_callback)
			
 
				+    logic.set_model(model_name)
			
 
				+    logic.run(file_paths, save_txt=True, merge_txt=False, output_img=False)
			
 
				+    txt_files = []
			
 
				+    for file_path in file_paths:
			
 
				+        out_dir = os.path.join(os.path.dirname(file_path), "Output_OCR")
			
 
				+        if not os.path.exists(out_dir):
			
 
				+            continue
			
 
				+        for fname in os.listdir(out_dir):
			
 
				+            if fname.endswith(".txt") and fname.startswith(os.path.splitext(os.path.basename(file_path))[0]):
			
 
				+                txt_files.append(os.path.join(out_dir, fname))
			
 
				+                with open(os.path.join(out_dir, fname), "r", encoding="utf-8") as f:
			
 
				+                    content = f.read()
			
 
				+                results.append({"filename": fname, "content": content})
			
 
				+    zip_path = os.path.join(session_dir, f"ocr_txt_{timestamp}.zip")
			
 
				+    with zipfile.ZipFile(zip_path, "w") as zipf:
			
 
				+        for txt_file in txt_files:
			
 
				+            zipf.write(txt_file, os.path.basename(txt_file))
			
 
				+    return jsonify({
			
 
				+        "success": True,
			
 
				+        "results": results,
			
 
				+        "zip_url": f"/download/{timestamp}"
			
 
				+    })
			
 
				+
			
 
				+@app.route("/download/<timestamp>")
			
 
				+def download_zip(timestamp):
			
 
				+    session_dir = os.path.join(RESULT_ROOT, timestamp)
			
 
				+    zip_path = os.path.join(session_dir, f"ocr_txt_{timestamp}.zip")
			
 
				+    if os.path.exists(zip_path):
			
 
				+        return send_file(zip_path, as_attachment=True, download_name=f"ocr_txt_{timestamp}.zip")
			
 
				+    return jsonify({"success": False, "msg": "文件不存在"}), 404
			
 
				+
			
 
				+@app.route("/ocr_api", methods=["POST"])
			
 
				+def ocr_api():
			
 
				+    data = request.get_json()
			
 
				+    if not data or "image" not in data:
			
 
				+        return jsonify({"error": "Invalid request, 'image' field is required."}), 400
			
 
				+    image_base64 = data["image"]
			
 
				+    try:
			
 
				+        image_bytes = base64.b64decode(image_base64)
			
 
				+        image_np = np.frombuffer(image_bytes, dtype=np.uint8)
			
 
				+        img = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
			
 
				+        if img is None:
			
 
				+            return jsonify({"error": "Failed to decode image from base64."}), 400
			
 
				+    except Exception as e:
			
 
				+        return jsonify({"error": f"Image decoding failed: {str(e)}"}), 400
			
 
				+    start_time = time.time()
			
 
				+    result = ocr_model_api.ocr(img)
			
 
				+    end_time = time.time()
			
 
				+    processing_time = end_time - start_time
			
 
				+    ocr_results = []
			
 
				+    for line in result[0]:
			
 
				+        if isinstance(line[0], (list, np.ndarray)):
			
 
				+            bounding_box = np.array(line[0]).reshape(4, 2).tolist()
			
 
				+        else:
			
 
				+            bounding_box = []
			
 
				+        ocr_results.append({
			
 
				+            "text": line[1][0],
			
 
				+            "confidence": float(line[1][1]),
			
 
				+            "bounding_box": bounding_box
			
 
				+        })
			
 
				+    return jsonify({
			
 
				+        "processing_time": processing_time,
			
 
				+        "results": ocr_results
			
 
				+    })
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    app.run(host="0.0.0.0", port=5005, debug=True)