yichael
/
AndroidRemoteController


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
							/**
 * fun 标签：ocr（OnnxOCR 识别）
 * 1）image 为图片路径时：对图片做 OCR，识别全文写入变量。
 * 2）image 为要查找的文字时：对设备截图做 OCR，在图中查找该文字，返回中心点坐标写入变量（需有设备）。
 */

const path = require('path')
const fs = require('fs')
const os = require('os')
const { spawnSync } = require('child_process')
const { captureScreenshot } = require('../../../adb/adb-screencap.js')

const configPath = process.env.STATIC_ROOT
  ? path.join(path.dirname(process.env.STATIC_ROOT), 'configs', 'config.js')
  : path.join(__dirname, '..', '..', '..', '..', 'configs', 'config.js')
const projectRoot = path.dirname(path.dirname(path.resolve(configPath)))
const config = fs.existsSync(configPath) ? require(configPath) : {}
const ocrScriptPath = path.join(projectRoot, 'python', 'scripts', 'ocr-onnx.py')

const tagName = 'ocr'

const schema = {
  description: 'OCR：传入图片路径则识别全文；传入要查找的文字则在设备截图中定位该文字并返回中心点坐标。',
  inputs: { image: '图片路径 或 要查找的文字', variable: '输出变量名（保存识别文本或中心点 {"x", "y"}）' },
  outputs: { variable: '识别文本 或 中心点 JSON' },
}

function getPythonPath() {
  const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64')
  const envPy = path.join(base, 'env', 'Scripts', 'python.exe')
  const scriptsPy = path.join(base, 'Scripts', 'python.exe')
  const pyEmbedded = path.join(base, 'py', 'python.exe')
  if (fs.existsSync(envPy)) return envPy
  if (fs.existsSync(scriptsPy)) return scriptsPy
  if (fs.existsSync(pyEmbedded)) return pyEmbedded
  return 'python'
}

/**
 * 对指定图片执行 OnnxOCR 识别
 * @param {{ imagePath: string, folderPath?: string }} input - imagePath 图片路径（已解析后的相对或绝对路径）, folderPath 流程目录
 * @returns {{ success: boolean, text?: string, error?: string }}
 */
async function executeOcr({ imagePath, folderPath }) {
  if (!imagePath || typeof imagePath !== 'string') {
    return { success: false, error: '缺少图片路径' }
  }
  const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot
  const isAbsoluteOrDrive = imagePath.startsWith('/') || imagePath.includes(':')
  const hasSubPath = imagePath.includes('/') || imagePath.includes(path.sep)
  const resolvedImage = isAbsoluteOrDrive ? imagePath : (hasSubPath ? path.join(baseDir, imagePath) : path.join(baseDir, 'resources', imagePath))

  if (!fs.existsSync(ocrScriptPath)) {
    return { success: false, error: `OCR 脚本不存在: ${ocrScriptPath}` }
  }
  if (!fs.existsSync(resolvedImage)) {
    return { success: false, error: `图片不存在: ${resolvedImage}` }
  }

  const pythonPath = getPythonPath()
  const r = spawnSync(pythonPath, [ocrScriptPath, '--image', resolvedImage, '--project-root', projectRoot], {
    encoding: 'utf-8',
    timeout: 60000,
    env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
    cwd: projectRoot,
  })

  const outStr = (r.stdout || '').trim()
  const errStr = (r.stderr || '').trim()
  if (r.status !== 0) {
    return { success: false, error: errStr || outStr || 'OCR 执行失败' }
  }
  let out
  try {
    out = JSON.parse(outStr)
  } catch (e) {
    return { success: false, error: `OCR 输出解析失败: ${outStr}` }
  }
  if (!out.success) {
    return { success: false, error: out.error || 'OCR 识别失败' }
  }
  return { success: true, text: out.text != null ? String(out.text) : '' }
}

/**
 * 在设备截图中查找指定文字，返回该文字区域中心点
 * @param {{ device: string, findText: string, folderPath?: string }} input
 * @returns {{ success: boolean, center?: { x: number, y: number }, error?: string }}
 */
async function executeOcrFindText({ device, findText, folderPath }) {
  if (!device) return { success: false, error: '缺少设备 ID，无法截图' }
  if (!findText || typeof findText !== 'string') return { success: false, error: '缺少要查找的文字' }
  const ts = Date.now()
  const screenshotPath = path.join(os.tmpdir(), `ef-ocr-screenshot-${ts}.png`)
  try {
    captureScreenshot(device, screenshotPath)
    if (!fs.existsSync(screenshotPath) || fs.statSync(screenshotPath).size === 0) {
      return { success: false, error: '设备截图失败或为空' }
    }
    const pythonPath = getPythonPath()
    const r = spawnSync(pythonPath, [ocrScriptPath, '--image', screenshotPath, '--find-text', findText.trim(), '--project-root', projectRoot], {
      encoding: 'utf-8',
      timeout: 60000,
      env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
      cwd: projectRoot,
    })
    const outStr = (r.stdout || '').trim()
    const errStr = (r.stderr || '').trim()
    if (r.status !== 0) {
      return { success: false, error: errStr || outStr || 'OCR 查找文字失败' }
    }
    let out
    try {
      out = JSON.parse(outStr)
    } catch (e) {
      return { success: false, error: `OCR 输出解析失败: ${outStr}` }
    }
    if (!out.success || out.x == null || out.y == null) {
      return { success: false, error: out.error || '图中未找到该文字' }
    }
    return { success: true, center: { x: out.x, y: out.y } }
  } finally {
    try { fs.unlinkSync(screenshotPath) } catch (_) {}
  }
}

module.exports = { tagName, schema, executeOcr, executeOcrFindText }