/** * fun 标签:img-center-point-location * 仅需 inVars[0]:模板路径。 * * 【当前逻辑】tmp → ADB 截图 + 模板复制 → VLM img2text(主模型 + 最多 2 个备用模型,**共≤3 次**; * 仅在「请求失败」或「无有效中心点坐标」时换模型重试)→ 归一化转像素 → * 可选:Python 在截图上画**绿色圆圈**标出中心点,保存为 `screenshot_center_marked.png`(同一次 `img-center-时间戳` 目录)。 * * 【已注释保留】原流程:VLM(ROI + template_crop / need_center_crop / template_scale)→ 外扩 roi → * Python 预处理 → NCC(img-center-orb-akaze.py)。恢复时取消下方对应块注释并改回 runPipeline 即可。 * * 密钥与网关:优先根目录 config.js(openaiApiKey 等同步到环境变量),否则 nodejs/ai/config.js;模型常量见文件顶部。 */ // --------------------------------------------------------------------------- // 配置:模型与超时(优先改这里或对应环境变量 / 根目录 config.js) // --------------------------------------------------------------------------- /** * 多模态模型 id(**仅请求一次** img2text):在此直接写字符串;写 '' 则依次用 * process.env.IMG_CENTER_OPENAI_MODEL、config.imgCenterOpenAiModel、nodejs/ai 默认链。 */ // const IMG_CENTER_OPENAI_MODEL = 'gemini-3.1-pro-preview' const IMG_CENTER_OPENAI_MODEL = 'gpt-5.4' /** * 中心点 VLM:主模型请求失败或 JSON 无法解析出有效 center_rx/center_ry 时,依次用备用 1、备用 2 再请求。 * 链为 [主模型, 备用1, 备用2] 去重后取前 3 个,**最多 3 次** img2text。 * 写 '' 表示该档跳过;环境变量 IMG_CENTER_FALLBACK_MODEL_1 / IMG_CENTER_FALLBACK_MODEL_2 优先于常量。 * * Claude 4.6 选型(多模态「对齐两点坐标」类任务,公开对比多认为 Opus 图像分析略强于 Sonnet,但本任务输出极短 JSON): * - 首选备用:**claude-sonnet-4-6**(延迟/成本更友好,视觉足够)。 * - 第二轮:**claude-opus-4-6**(能力上限略高,仍不稳再上)。 * - *-thinking:推理链更长、更慢更贵;非复杂推理时一般**不必**作默认备用。 * OpenAI 网关可改回如 gpt-4o / gpt-4.1。 */ const IMG_CENTER_FALLBACK_MODEL_1 = 'claude-opus-4-6' const IMG_CENTER_FALLBACK_MODEL_2 = 'gemini-3.1-pro-preview' /** 中心点 img2text 最多调用次数(主 + 备用,去重后截断) */ const IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS = 3 /** * 视觉 API(img2text)请求超时(毫秒) * 环境变量 IMG_CENTER_AI_TIMEOUT_MS 可覆盖 */ const IMG_CENTER_AI_TIMEOUT_MS = Math.max( 10_000, parseInt(String(process.env.IMG_CENTER_AI_TIMEOUT_MS || '').trim(), 10) || 300_000 ) /** 在截图上绘制中心点绿圈的 Python 脚本超时(毫秒);环境变量 IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS */ const IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS = Math.max( 5000, parseInt(String(process.env.IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS || '').trim(), 10) || 30_000 ) /** 【旧 NCC 流程】runPipeline 内等待 Matched.png 就绪的上限(毫秒) */ const MATCHED_PNG_MAX_WAIT_MS = 60_000 /** 【旧 NCC 流程】Python NCC 匹配脚本 spawn 超时(毫秒) */ const PYTHON_ORB_SCRIPT_TIMEOUT_MS = 120_000 /** * 【旧 NCC 流程】NCC 最低分:传给 Python(环境变量 IMG_CENTER_NCC_MIN_SCORE)。 */ const IMG_CENTER_NCC_MIN_SCORE_DEFAULT = 0.34 /** 【旧预处理】模板预处理脚本 spawn 超时(毫秒);环境变量 IMG_CENTER_PREPROCESS_TIMEOUT_MS 可覆盖 */ const PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS = Math.max( 5000, parseInt(String(process.env.IMG_CENTER_PREPROCESS_TIMEOUT_MS || '').trim(), 10) || 60_000 ) /** * 【旧 VLM ROI】外扩比例,环境变量 IMG_CENTER_ROI_PAD 可覆盖 */ const IMG_CENTER_ROI_PAD = Math.max( 0, Math.min( 0.12, parseFloat(String(process.env.IMG_CENTER_ROI_PAD || '').trim()) || 0.03 ) ) /** * 【旧 VLM ROI】归一化高度下限;IMG_CENTER_ROI_MIN_REL_H,默认 0.15 */ const IMG_CENTER_ROI_MIN_REL_H = Math.max( 0.06, Math.min( 0.45, parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_H || '').trim()) || 0.15 ) ) /** * 【旧 VLM ROI】归一化宽度下限(0 表示不强制);IMG_CENTER_ROI_MIN_REL_W */ const IMG_CENTER_ROI_MIN_REL_W = (() => { const v = parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_W || '').trim()) if (!Number.isFinite(v) || v <= 0) return 0 return Math.max(0.06, Math.min(0.95, v)) })() // --------------------------------------------------------------------------- // 依赖与工程路径 // --------------------------------------------------------------------------- const path = require('path') const fs = require('fs') const { spawnSync } = require('child_process') const { getPythonExeFromConfig } = require('../../../../python-exe-from-config.js') const configPath = process.env.STATIC_ROOT ? path.join(path.dirname(path.resolve(process.env.STATIC_ROOT)), 'config.js') : path.join(__dirname, '..', '..', '..', '..', '..', 'config.js') const config = fs.existsSync(configPath) ? require(configPath) : {} const projectRoot = (config.projectRoot && fs.existsSync(config.projectRoot)) ? config.projectRoot : path.dirname(path.resolve(configPath)) /** 在加载 nodejs/ai 之前同步,使 ai/config 能读到应用级密钥与网关 */ function syncProcessEnvFromAppConfig () { const k = config.openaiApiKey || config.vlmApiKey if ( k && !String(process.env.API_KEY || '').trim() && !String(process.env.OPENAI_API_KEY || '').trim() && !String(process.env.VLM_API_KEY || '').trim() ) { process.env.API_KEY = String(k).trim() } const u = config.openaiApiUrl if ( u && !String(process.env.BASE_URL || '').trim() && !String(process.env.OPENAI_API_URL || '').trim() ) { process.env.BASE_URL = String(u).trim().replace(/\/$/, '') } } syncProcessEnvFromAppConfig() const aiRoot = path.join(__dirname, '..', '..', '..', '..', 'ai') const aiModule = require(path.join(aiRoot, 'ai.js')) const img2textRequest = require(path.join(aiRoot, 'request', 'img2text.js')) const aiPackageConfig = require(path.join(aiRoot, 'config.js')) /** 【旧 NCC / 预处理】脚本路径(当前 runPipeline 不调用;恢复旧流程时用) */ const orbScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-orb-akaze.py') const preprocessTemplateScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-preprocess-template.py') /** 在截图上标出 VLM 中心点(绿圈)的可视化脚本 */ const markCenterScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-mark-center.py') /** * 【当前】仅问中心点:图1=截图,图2=模板;返回 center_rx、center_ry ∈ [0,1](相对图1 宽高)。 */ const VLM_CENTER_ONLY_PROMPT = `你收到两张图,顺序固定: 图1:Android 手机完整截图(与 adb screencap 一致),逻辑像素宽约 W、高约 H;坐标原点在左上角,x 向右、y 向下。 图2:模板图。请在图1 中找到与图2 视觉上对应的同一区域(同一图标、缩略图格子、按钮等)。 任务:给出图2 在图1 中**匹配可见区域的几何中心点**(该区域中心,不是图2 整张文件的画布中心)。 只输出一个 JSON 对象,必须包含: - "center_rx"、"center_ry":数字,取值在 [0,1],分别为该中心点在图1 上相对宽度 W、高度 H 的归一化坐标(左缘=0,上缘=0)。 若图1 中完全无法对应图2,两个键均填 null。 禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。` /* * ---------- 旧 ROI + 模板几何 VLM 提示(NCC 流程用,保留勿删)---------- const VLM_USER_PROMPT = `你收到两张图,顺序固定: 图1:Android 手机竖屏完整截图,逻辑像素宽 W、高 H。 图2:模板图——要在图1画面里定位的同一内容。常见情况:图2 是 PC/自动化侧通过 ADB 推到手机上的**原始文件**(与磁盘上打开的同一像素内容),**不是**相册 App 已处理后的内部版本。图1 里若出现该图,往往是**系统相册/图库**里的**网格缩略图**:会先对原图做**缩小**,且方格多为**近似正方形**,竖图/横图常被**居中裁成方图**再显示,与图2 原始长宽比可能完全不同。 任务:在图1中找到与图2视觉上对应的那一块区域(同一控件、同一相册格里的缩略图等),给出「搜索矩形」roi_hint;并在适用时给出 template_crop、template_scale、need_center_crop,使下游能**复现相册里那一格的「裁切 + 缩放」**,用于模板匹配。 硬性规则(roi_hint): - roi_hint 的四个数必须是相对图1 的归一化坐标:rx0,ry0 为矩形左上角,rx1,ry1 为右下角,均在 [0,1],且 rx0 0 && height > 0 && (px > 1 || py > 1)) { crx = clamp01(px / width) cry = clamp01(py / height) if (crx != null && cry != null) return { center_rx: crx, center_ry: cry } } crx = clamp01(px) cry = clamp01(py) if (crx != null && cry != null) return { center_rx: crx, center_ry: cry } return null } /** 实际发往 API 的模型名:见顶部 IMG_CENTER_OPENAI_MODEL → env / config → nodejs/ai。 */ function getImgCenterModel () { const explicit = String(IMG_CENTER_OPENAI_MODEL || '').trim() || String(process.env.IMG_CENTER_OPENAI_MODEL || '').trim() || (config.imgCenterOpenAiModel && String(config.imgCenterOpenAiModel).trim()) || '' return img2textRequest.resolveImgCenterModel(explicit || undefined) } /** 第一次仅用 OpenAI 兼容网关;若配置成 doubao 则改用 nodejs/ai 的 IMG_CENTER_MODEL(避免首轮即豆包) */ function getPrimaryOpenAiImgCenterModel () { const m = getImgCenterModel() if (m && String(m).toLowerCase() === 'doubao') { const fb = String(aiPackageConfig.IMG_CENTER_MODEL || '').trim() if (fb && fb.toLowerCase() !== 'doubao') return fb return 'gpt-5.4' } return m } function resolveFallbackCenterModelId (envKey, constVal) { const e = String(process.env[envKey] || '').trim() if (e) return e return String(constVal != null ? constVal : '').trim() } /** * 中心点 img2text 模型链:[主模型, 备用1, 备用2] 去重后取前 IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS 个。 */ function getCenterPointVlmModelChain () { const primary = String(getPrimaryOpenAiImgCenterModel() || '').trim() const fb1 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1) const fb2 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2) const chain = [] const push = (id) => { const x = String(id || '').trim() if (!x) return if (!chain.includes(x)) chain.push(x) } push(primary) push(fb1) push(fb2) return chain.slice(0, IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS) } function getImgCenterAiMeta () { return { model: getImgCenterModel(), primaryOpenAiModel: getPrimaryOpenAiImgCenterModel(), centerPointVlmModelChain: getCenterPointVlmModelChain(), fallbackModel1: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1), fallbackModel2: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2), baseUrl: aiPackageConfig.BASE_URL, openAiKeyConfigured: !!(aiPackageConfig.API_KEY && String(aiPackageConfig.API_KEY).trim()), } } /** * 【当前】单次 img2text(指定 model);每次尝试写入 openai_raw_attempt_{i}.json。 * @returns {{ ok: boolean, data?: object, error?: string, model: string, rawResp?: object }} */ async function callOpenAiCenterPointWithModel (workDir, screenshotPath, templatePath, modelName, attemptIndex) { syncProcessEnvFromAppConfig() const openAiKey = String(aiPackageConfig.API_KEY || '').trim() const model = String(modelName || '').trim() const screenUrl = fileToDataUrlPng(screenshotPath) const tplUrl = fileToDataUrlPng(templatePath) const imageUrls = [screenUrl, tplUrl] try { if (!openAiKey) { return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', model } } if (!model) { return { ok: false, error: '模型 id 为空', model } } const result = await aiModule.run('img2text', VLM_CENTER_ONLY_PROMPT, imageUrls, { timeoutMs: IMG_CENTER_AI_TIMEOUT_MS, model, }) const resp = result.data const attemptPayload = { model, attemptIndex, httpSuccess: result.success, httpError: result.success ? null : (result.error || null), response: resp, } fs.writeFileSync( path.join(workDir, `openai_raw_attempt_${attemptIndex}.json`), JSON.stringify(attemptPayload, null, 2), 'utf8' ) if (!result.success) { return { ok: false, error: result.error || 'VLM 请求失败', model } } const content = resp?.choices?.[0]?.message?.content const parsed = parseVlmJson(content) if (!parsed || typeof parsed !== 'object') { return { ok: false, error: '无法解析模型返回为 JSON', model, rawResp: resp } } return { ok: true, data: parsed, model, rawResp: resp } } catch (e) { const msg = e && e.message ? e.message : String(e) try { fs.writeFileSync(path.join(workDir, `openai_error_attempt_${attemptIndex}.txt`), msg, 'utf8') } catch (_) {} return { ok: false, error: msg, model } } } /* * ---------- 旧 callOpenAiRoi(ROI + 模板几何,NCC 流程用,保留勿删)---------- async function callOpenAiRoi (workDir, screenshotPath, templatePath) { syncProcessEnvFromAppConfig() const openAiKey = String(aiPackageConfig.API_KEY || '').trim() const screenUrl = fileToDataUrlPng(screenshotPath) const tplUrl = fileToDataUrlPng(templatePath) const imageUrls = [screenUrl, tplUrl] const emptyFallback = { roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 }, need_center_crop: false, confidence: 0, template_scale: 1, template_crop: { cx0: 0, cy0: 0, cx1: 1, cy1: 1 }, template_vlm_preprocessed: false, } try { if (!openAiKey) { return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', fallback: emptyFallback } } const openAiModel = getPrimaryOpenAiImgCenterModel() const result = await aiModule.run('img2text', VLM_USER_PROMPT, imageUrls, { timeoutMs: IMG_CENTER_AI_TIMEOUT_MS, model: openAiModel, }) if (!result.success) { return { ok: false, error: result.error || 'VLM 请求失败', fallback: emptyFallback } } const resp = result.data const content = resp?.choices?.[0]?.message?.content const parsed = parseVlmJson(content) if (!parsed || typeof parsed !== 'object') { return { ok: false, error: '无法解析模型返回为 JSON', fallback: emptyFallback } } fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(resp, null, 2), 'utf8') return { ok: true, data: parsed } } catch (e) { const msg = e && e.message ? e.message : String(e) return { ok: false, error: msg, fallback: emptyFallback } } } * ---------- 旧 callOpenAiRoi 结束 ---------- */ /* * ---------- 旧 normalizeTemplateGeometry / normalizeVlmPayload / expandRoiHintNormalized / isFullScreenRoiHint(保留勿删)---------- function normalizeTemplateGeometry (obj) { const c = obj && obj.template_crop let cx0 = 0 let cy0 = 0 let cx1 = 1 let cy1 = 1 if (c && typeof c === 'object') { const n = (k, d) => { const v = Number(c[k]) return Number.isFinite(v) ? Math.max(0, Math.min(1, v)) : d } cx0 = n('cx0', 0) cy0 = n('cy0', 0) cx1 = n('cx1', 1) cy1 = n('cy1', 1) if (cx1 <= cx0) { cx0 = 0 cx1 = 1 } if (cy1 <= cy0) { cy0 = 0 cy1 = 1 } } let sc = Number(obj && obj.template_scale) if (!Number.isFinite(sc) || sc <= 0) sc = 1 sc = Math.max(0.05, Math.min(1, sc)) return { template_crop: { cx0, cy0, cx1, cy1 }, template_scale: sc, } } function normalizeVlmPayload (obj) { const rh = obj.roi_hint || {} const nums = ['rx0', 'ry0', 'rx1', 'ry1'] let bad = false for (const k of nums) { const v = rh[k] if (v === null || v === undefined) bad = true } const tg = normalizeTemplateGeometry(obj) if (bad) { return { roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 }, need_center_crop: false, confidence: 0, template_scale: tg.template_scale, template_crop: tg.template_crop, template_vlm_preprocessed: false, } } const c = Number(obj.confidence) const conf = Number.isFinite(c) ? Math.max(0, Math.min(1, c)) : 0 return { roi_hint: { rx0: Number(rh.rx0), ry0: Number(rh.ry0), rx1: Number(rh.rx1), ry1: Number(rh.ry1), }, need_center_crop: !!obj.need_center_crop, confidence: conf, template_scale: tg.template_scale, template_crop: tg.template_crop, template_vlm_preprocessed: false, } } function expandRoiHintNormalized (rh) { let rx0 = Number(rh.rx0) let ry0 = Number(rh.ry0) let rx1 = Number(rh.rx1) let ry1 = Number(rh.ry1) if (!(rx1 > rx0 && ry1 > ry0)) return rh const pad = IMG_CENTER_ROI_PAD rx0 = Math.max(0, rx0 - pad) ry0 = Math.max(0, ry0 - pad) rx1 = Math.min(1, rx1 + pad) ry1 = Math.min(1, ry1 + pad) let h = ry1 - ry0 const hMin = IMG_CENTER_ROI_MIN_REL_H if (h < hMin) { const deficit = hMin - h const bottomAnchored = ry1 >= 0.88 if (bottomAnchored) { ry0 = Math.max(0, ry0 - deficit) h = ry1 - ry0 if (h < hMin) { ry1 = Math.min(1, ry0 + hMin) h = ry1 - ry0 if (h < hMin) ry0 = Math.max(0, ry1 - hMin) } } else { const cy = (ry0 + ry1) / 2 ry0 = Math.max(0, cy - hMin / 2) ry1 = Math.min(1, ry0 + hMin) if (ry1 >= 1 - 1e-9) ry0 = Math.max(0, 1 - hMin) } } const wMin = IMG_CENTER_ROI_MIN_REL_W if (wMin > 0) { let w = rx1 - rx0 if (w < wMin) { const cx = (rx0 + rx1) / 2 rx0 = Math.max(0, cx - wMin / 2) rx1 = Math.min(1, rx0 + wMin) if (rx1 >= 1 - 1e-9) rx0 = Math.max(0, 1 - wMin) } } return { rx0, ry0, rx1, ry1 } } function isFullScreenRoiHint (rh) { return ( Math.abs(Number(rh.rx0)) < 1e-9 && Math.abs(Number(rh.ry0)) < 1e-9 && Math.abs(Number(rh.rx1) - 1) < 1e-9 && Math.abs(Number(rh.ry1) - 1) < 1e-9 ) } * ---------- 旧几何归一化结束 ---------- */ /* * ---------- 旧 getPythonPath / runTemplatePreprocess / waitUntilMatchedWritten(保留勿删)---------- function getPythonPath () { const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64') const envPy = path.join(base, 'env', 'Scripts', 'python.exe') const scriptsPy = path.join(base, 'Scripts', 'python.exe') const pyEmbedded = path.join(base, 'py', 'python.exe') if (fs.existsSync(envPy)) return envPy if (fs.existsSync(scriptsPy)) return scriptsPy if (fs.existsSync(pyEmbedded)) return pyEmbedded return 'python' } function runTemplatePreprocess (pythonPath, templateAbsPath, workDir) { if (!fs.existsSync(preprocessTemplateScriptPath)) { return { ok: false, error: `未找到 ${preprocessTemplateScriptPath}` } } const env = { ...process.env, PYTHONIOENCODING: 'utf-8' } if (process.platform === 'win32') { const pyDir = path.dirname(pythonPath) const pyRoot = path.dirname(path.dirname(pyDir)) env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter) } const r = spawnSync( pythonPath, [preprocessTemplateScriptPath, '--src', templateAbsPath, '--work-dir', workDir], { encoding: 'utf-8', timeout: PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS, env, cwd: projectRoot, windowsHide: true } ) if (r.status !== 0) { const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '模板预处理失败' return { ok: false, error: msg } } try { const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || '' const j = JSON.parse(tail) if (!j.success) return { ok: false, error: j.error || '模板预处理失败' } return { ok: true, meta: j } } catch (e) { return { ok: false, error: '模板预处理输出非 JSON' } } } function waitUntilMatchedWritten (absPath, maxMs) { const matchedWaitMs = maxMs != null ? maxMs : (process.env.IMG_MATCH_MATCHED_WAIT_MS ? Math.max(5000, parseInt(process.env.IMG_MATCH_MATCHED_WAIT_MS, 10) || 30000) : 30000) if (!absPath) return true const t0 = Date.now() let lastSize = -1 let stableStart = 0 const STABLE_MS = 120 while (Date.now() - t0 < matchedWaitMs) { try { if (fs.existsSync(absPath)) { const st = fs.statSync(absPath) if (st.size >= 32) { if (st.size === lastSize) { if (Date.now() - stableStart >= STABLE_MS) return true } else { lastSize = st.size stableStart = Date.now() } } } } catch (_) {} const until = Date.now() + 35 while (Date.now() < until) {} } try { return fs.existsSync(absPath) && fs.statSync(absPath).size >= 32 } catch (_) { return false } } * ---------- 旧 getPythonPath / 预处理 / Matched 等待结束 ---------- */ function resolvePythonExecutable () { return getPythonExeFromConfig(config) } /** * 在 screenshot 上以绿色空心圆标出中心点,写入 workDir/screenshot_center_marked.png。 * 圆半径可由环境变量 IMG_CENTER_MARK_RADIUS(正整数像素)覆盖,否则由脚本按分辨率估算。 * @returns {{ ok: boolean, outPath?: string, error?: string }} */ function drawCenterMarkOnScreenshot (workDir, screenshotPath, centerX, centerY) { if (!fs.existsSync(markCenterScriptPath)) { return { ok: false, error: `未找到 ${markCenterScriptPath}` } } const outPath = path.join(workDir, 'screenshot_center_marked.png') const pythonPath = resolvePythonExecutable() const args = [ markCenterScriptPath, '--input', screenshotPath, '--output', outPath, '--x', String(Math.round(centerX)), '--y', String(Math.round(centerY)), ] const rEnv = parseInt(String(process.env.IMG_CENTER_MARK_RADIUS || '').trim(), 10) if (Number.isFinite(rEnv) && rEnv > 0) { args.push('--radius', String(rEnv)) } const env = { ...process.env, PYTHONIOENCODING: 'utf-8' } if (process.platform === 'win32') { const pyDir = path.dirname(pythonPath) const pyRoot = path.dirname(path.dirname(pyDir)) env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter) } const r = spawnSync(pythonPath, args, { encoding: 'utf-8', timeout: IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS, env, cwd: projectRoot, windowsHide: true, }) if (r.status !== 0) { const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '绘制中心点标记失败' return { ok: false, error: msg } } try { const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || '' const j = JSON.parse(tail) if (!j.success) return { ok: false, error: j.error || '绘制中心点标记失败' } } catch (_) { if (!fs.existsSync(outPath) || fs.statSync(outPath).size < 32) { return { ok: false, error: '标记脚本输出非 JSON 或未写出有效 PNG' } } } return { ok: true, outPath } } async function runPipeline (device, templateAbsPath, folderPath) { if (!device) return { success: false, error: '缺少设备 ID' } if (!templateAbsPath || !fs.existsSync(templateAbsPath)) { return { success: false, error: `模板不存在: ${templateAbsPath}` } } const tmpRoot = resolveWorkflowTmpRoot(folderPath) fs.mkdirSync(tmpRoot, { recursive: true }) const workDir = path.join(tmpRoot, `img-center-${Date.now()}`) fs.mkdirSync(workDir, { recursive: true }) const screenshotPath = path.join(workDir, 'screenshot.png') const templateInWork = path.join(workDir, 'template.png') const adbPath = getAdbPath() if (!adbScreencapPng(adbPath, device, screenshotPath)) { return { success: false, error: 'ADB 截图失败' } } fs.copyFileSync(templateAbsPath, templateInWork) const dims = readPngIhdrDimensions(screenshotPath) if (!dims) { return { success: false, error: '无法读取截图 PNG 尺寸(IHDR)', workDir } } const modelChain = getCenterPointVlmModelChain() if (modelChain.length === 0) { const err = '未配置可用 VLM 模型' fs.writeFileSync(path.join(workDir, 'openai_error.txt'), err, 'utf8') return { success: false, error: err, workDir } } const attemptLog = [] let lastError = '' /** @type {{ center_rx: number, center_ry: number } | null} */ let norm = null let aiData = null let successRaw = null let successModel = null for (let i = 0; i < modelChain.length; i++) { const m = modelChain[i] const ai = await callOpenAiCenterPointWithModel(workDir, screenshotPath, templateInWork, m, i) const normTry = ai.ok ? parseCenterNormalizedFromVlm(ai.data, dims.width, dims.height) : null attemptLog.push({ index: i, model: m, requestOk: ai.ok, error: ai.ok ? null : ai.error, hasValidCenter: !!normTry, }) if (!ai.ok) { lastError = ai.error || 'VLM 中心点失败' continue } if (normTry) { norm = normTry aiData = ai.data successRaw = ai.rawResp successModel = m break } lastError = '模型未返回有效中心点(需 center_rx/center_ry 或兼容字段)' } try { fs.writeFileSync( path.join(workDir, 'vlm_center_model_attempts.json'), JSON.stringify( { model_chain: modelChain, success_model: successModel, attempts: attemptLog, }, null, 2 ), 'utf8' ) } catch (_) {} if (!norm || !aiData) { fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(lastError || 'unknown'), 'utf8') fs.writeFileSync(path.join(workDir, 'center_parse_error.txt'), String(lastError || 'unknown'), 'utf8') return { success: false, error: lastError || 'VLM 中心点失败', workDir } } try { if (successRaw) { fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(successRaw, null, 2), 'utf8') } fs.writeFileSync(path.join(workDir, 'vlm_center_parsed.json'), JSON.stringify(aiData, null, 2), 'utf8') } catch (_) {} const px = Math.round(norm.center_rx * dims.width) const py = Math.round(norm.center_ry * dims.height) const ix = Math.max(0, Math.min(dims.width - 1, px)) const iy = Math.max(0, Math.min(dims.height - 1, py)) let markedScreenshotPath = null const mark = drawCenterMarkOnScreenshot(workDir, screenshotPath, ix, iy) if (mark.ok) { markedScreenshotPath = mark.outPath || null } else { fs.writeFileSync( path.join(workDir, 'screenshot_center_mark_error.txt'), String(mark.error || 'unknown'), 'utf8' ) } fs.writeFileSync( path.join(workDir, 'vlm_center_result.json'), JSON.stringify( { center_rx: norm.center_rx, center_ry: norm.center_ry, pixel_x: ix, pixel_y: iy, screenshot_width: dims.width, screenshot_height: dims.height, marked_screenshot: markedScreenshotPath ? path.basename(markedScreenshotPath) : null, marked_screenshot_error: markedScreenshotPath ? null : (mark.error || '未生成标记图'), }, null, 2 ), 'utf8' ) return { success: true, coordinate: { x: ix, y: iy, width: 1, height: 1 }, clickPosition: { x: ix, y: iy }, workDir, markedScreenshotPath, } } /* * ---------- 旧 runPipeline(ROI + 预处理 + NCC,保留勿删)---------- async function runPipeline_OLD_NCC (device, templateAbsPath, folderPath) { if (!device) return { success: false, error: '缺少设备 ID' } if (!templateAbsPath || !fs.existsSync(templateAbsPath)) { return { success: false, error: `模板不存在: ${templateAbsPath}` } } if (!fs.existsSync(orbScriptPath)) { return { success: false, error: `未找到 ${orbScriptPath}` } } if (!fs.existsSync(preprocessTemplateScriptPath)) { return { success: false, error: `未找到 ${preprocessTemplateScriptPath}` } } const tmpRoot = resolveWorkflowTmpRoot(folderPath) fs.mkdirSync(tmpRoot, { recursive: true }) const workDir = path.join(tmpRoot, `img-center-${Date.now()}`) fs.mkdirSync(workDir, { recursive: true }) const screenshotPath = path.join(workDir, 'screenshot.png') const templateInWork = path.join(workDir, 'template.png') const matchedPath = path.join(workDir, 'Matched.png') const adbPath = getAdbPath() if (!adbScreencapPng(adbPath, device, screenshotPath)) { return { success: false, error: 'ADB 截图失败' } } fs.copyFileSync(templateAbsPath, templateInWork) const ai = await callOpenAiRoi(workDir, screenshotPath, templateInWork) if (!ai.ok) { fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(ai.error || 'unknown'), 'utf8') return { success: false, error: ai.error || 'VLM ROI 失败', workDir } } let payload = normalizeVlmPayload(ai.data) if (!isFullScreenRoiHint(payload.roi_hint)) { payload = { ...payload, roi_hint: expandRoiHintNormalized(payload.roi_hint), } } fs.writeFileSync(path.join(workDir, 'vlm_roi.json'), JSON.stringify(payload, null, 2), 'utf8') const pythonPath = getPythonPath() const prep = runTemplatePreprocess(pythonPath, templateAbsPath, workDir) if (!prep.ok) { fs.copyFileSync(templateAbsPath, templateInWork) try { const vrPath = path.join(workDir, 'vlm_roi.json') const vr = JSON.parse(fs.readFileSync(vrPath, 'utf8')) vr.template_vlm_preprocessed = false delete vr.template_preprocess_paths fs.writeFileSync(vrPath, JSON.stringify(vr, null, 2), 'utf8') } catch (_) {} fs.writeFileSync(path.join(workDir, 'template_preprocess_error.txt'), String(prep.error || ''), 'utf8') } const env = { ...process.env, PYTHONIOENCODING: 'utf-8' } if (!String(env.IMG_CENTER_NCC_MIN_SCORE || '').trim()) { env.IMG_CENTER_NCC_MIN_SCORE = String(IMG_CENTER_NCC_MIN_SCORE_DEFAULT) } if (process.platform === 'win32') { const pyDir = path.dirname(pythonPath) const pyRoot = path.dirname(path.dirname(pyDir)) env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter) } const r = spawnSync( pythonPath, [orbScriptPath, '--work-dir', workDir], { encoding: 'utf-8', timeout: PYTHON_ORB_SCRIPT_TIMEOUT_MS, env, cwd: projectRoot } ) if (r.status !== 0) { const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || 'NCC 匹配脚本失败' return { success: false, error: msg, workDir } } let out try { out = JSON.parse(r.stdout.trim()) } catch (e) { return { success: false, error: `脚本输出非 JSON: ${(r.stdout || '').slice(0, 300)}`, workDir } } if (!out.success) return { success: false, error: out.error || '匹配失败', workDir } if (!waitUntilMatchedWritten(matchedPath, MATCHED_PNG_MAX_WAIT_MS)) { return { success: false, error: `Matched.png 未就绪: ${matchedPath}`, workDir } } return { success: true, coordinate: { x: out.x, y: out.y, width: out.width, height: out.height }, clickPosition: { x: out.center_x, y: out.center_y }, workDir, } } * ---------- 旧 runPipeline 结束 ---------- */ /** * press/locate:Electron 侧 await;返回 { success, coordinate?, clickPosition?, error? } * @param {string} [folderPath] 当前流程目录(如 .../static/process/GenerateNote),临时文件写入其下 tmp/ */ async function matchImageAndGetCoordinate (device, imagePath, folderPath) { const templatePath = path.isAbsolute(imagePath) ? imagePath : path.resolve(projectRoot, imagePath) const r = await runPipeline(device, templatePath, folderPath) if (!r.success) return { success: false, error: r.error } return { success: true, coordinate: r.coordinate, clickPosition: r.clickPosition, markedScreenshotPath: r.markedScreenshotPath || null, } } async function executeImgCenterPointLocation ({ device, template, folderPath }) { if (!device) return { success: false, error: '缺少设备 ID,无法自动获取截图' } if (!template || typeof template !== 'string') return { success: false, error: '缺少模板图片路径(inVars[0])' } const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot const isAbsoluteOrDrive = template.startsWith('/') || template.includes(':') const hasSubPath = template.includes('/') || template.includes(path.sep) const templatePath = isAbsoluteOrDrive ? template : (hasSubPath ? path.join(baseDir, template) : path.join(baseDir, 'resources', template)) const result = await runPipeline(device, templatePath, folderPath) if (!result.success) return { success: false, error: result.error } const center = result.clickPosition || { x: result.coordinate.x + result.coordinate.width / 2, y: result.coordinate.y + result.coordinate.height / 2, } return { success: true, center, coordinate: result.coordinate, workDir: result.workDir, markedScreenshotPath: result.markedScreenshotPath || null, } } module.exports = { tagName, schema, executeImgCenterPointLocation, matchImageAndGetCoordinate, /** @deprecated 与 matchImageAndGetCoordinate 相同 */ matchImageAndGetCoordinateAsync: matchImageAndGetCoordinate, /** 解析后的模型名 + 当前 ai 包 baseUrl(测试 / 调试) */ getImgCenterModel, getImgCenterAiMeta, /** 中心点 VLM 实际调用链(主 + 备用,≤3) */ getCenterPointVlmModelChain, }