/** * fun 标签:ocr(OnnxOCR 识别) * 1)image 为图片路径时:对图片做 OCR,识别全文写入变量。 * 2)image 为要查找的文字时:对设备截图做 OCR,在图中查找该文字,返回中心点坐标写入变量(需有设备)。 */ const path = require('path') const fs = require('fs') const os = require('os') const { spawnSync } = require('child_process') const { captureScreenshot } = require('../../../adb/adb-screencap.js') const configPath = process.env.STATIC_ROOT ? path.join(path.dirname(process.env.STATIC_ROOT), 'configs', 'config.js') : path.join(__dirname, '..', '..', '..', '..', 'configs', 'config.js') const projectRoot = path.dirname(path.dirname(path.resolve(configPath))) const config = fs.existsSync(configPath) ? require(configPath) : {} const ocrScriptPath = path.join(projectRoot, 'python', 'scripts', 'ocr-onnx.py') const tagName = 'ocr' const schema = { description: 'OCR:传入图片路径则识别全文;传入要查找的文字则在设备截图中定位该文字并返回中心点坐标。', inputs: { image: '图片路径 或 要查找的文字', variable: '输出变量名(保存识别文本或中心点 {"x", "y"})' }, outputs: { variable: '识别文本 或 中心点 JSON' }, } function getPythonPath() { const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64') const envPy = path.join(base, 'env', 'Scripts', 'python.exe') const scriptsPy = path.join(base, 'Scripts', 'python.exe') const pyEmbedded = path.join(base, 'py', 'python.exe') if (fs.existsSync(envPy)) return envPy if (fs.existsSync(scriptsPy)) return scriptsPy if (fs.existsSync(pyEmbedded)) return pyEmbedded return 'python' } /** * 对指定图片执行 OnnxOCR 识别 * @param {{ imagePath: string, folderPath?: string }} input - imagePath 图片路径(已解析后的相对或绝对路径), folderPath 流程目录 * @returns {{ success: boolean, text?: string, error?: string }} */ async function executeOcr({ imagePath, folderPath }) { if (!imagePath || typeof imagePath !== 'string') { return { success: false, error: '缺少图片路径' } } const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot const isAbsoluteOrDrive = imagePath.startsWith('/') || imagePath.includes(':') const hasSubPath = imagePath.includes('/') || imagePath.includes(path.sep) const resolvedImage = isAbsoluteOrDrive ? imagePath : (hasSubPath ? path.join(baseDir, imagePath) : path.join(baseDir, 'resources', imagePath)) if (!fs.existsSync(ocrScriptPath)) { return { success: false, error: `OCR 脚本不存在: ${ocrScriptPath}` } } if (!fs.existsSync(resolvedImage)) { return { success: false, error: `图片不存在: ${resolvedImage}` } } const pythonPath = getPythonPath() const r = spawnSync(pythonPath, [ocrScriptPath, '--image', resolvedImage, '--project-root', projectRoot], { encoding: 'utf-8', timeout: 60000, env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, cwd: projectRoot, }) const outStr = (r.stdout || '').trim() const errStr = (r.stderr || '').trim() if (r.status !== 0) { return { success: false, error: errStr || outStr || 'OCR 执行失败' } } let out try { out = JSON.parse(outStr) } catch (e) { return { success: false, error: `OCR 输出解析失败: ${outStr}` } } if (!out.success) { return { success: false, error: out.error || 'OCR 识别失败' } } return { success: true, text: out.text != null ? String(out.text) : '' } } /** * 在设备截图中查找指定文字,返回该文字区域中心点 * @param {{ device: string, findText: string, folderPath?: string }} input * @returns {{ success: boolean, center?: { x: number, y: number }, error?: string }} */ async function executeOcrFindText({ device, findText, folderPath }) { if (!device) return { success: false, error: '缺少设备 ID,无法截图' } if (!findText || typeof findText !== 'string') return { success: false, error: '缺少要查找的文字' } const ts = Date.now() const screenshotPath = path.join(os.tmpdir(), `ef-ocr-screenshot-${ts}.png`) try { captureScreenshot(device, screenshotPath) if (!fs.existsSync(screenshotPath) || fs.statSync(screenshotPath).size === 0) { return { success: false, error: '设备截图失败或为空' } } const pythonPath = getPythonPath() const r = spawnSync(pythonPath, [ocrScriptPath, '--image', screenshotPath, '--find-text', findText.trim(), '--project-root', projectRoot], { encoding: 'utf-8', timeout: 60000, env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, cwd: projectRoot, }) const outStr = (r.stdout || '').trim() const errStr = (r.stderr || '').trim() if (r.status !== 0) { return { success: false, error: errStr || outStr || 'OCR 查找文字失败' } } let out try { out = JSON.parse(outStr) } catch (e) { return { success: false, error: `OCR 输出解析失败: ${outStr}` } } if (!out.success || out.x == null || out.y == null) { return { success: false, error: out.error || '图中未找到该文字' } } return { success: true, center: { x: out.x, y: out.y } } } finally { try { fs.unlinkSync(screenshotPath) } catch (_) {} } } module.exports = { tagName, schema, executeOcr, executeOcrFindText }