img-center-point-location.js 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. /**
  2. * fun 标签:img-center-point-location
  3. * 仅需 inVars[0]:模板路径。
  4. *
  5. * 【当前逻辑】tmp → ADB 截图 + 模板复制 → VLM img2text(主模型 + 最多 2 个备用模型,**共≤3 次**;
  6. * 仅在「请求失败」或「无有效中心点坐标」时换模型重试)→ 归一化转像素 →
  7. * 可选:Python 在截图上画**绿色圆圈**标出中心点,保存为 `screenshot_center_marked.png`(同一次 `img-center-时间戳` 目录)。
  8. *
  9. * 【已注释保留】原流程:VLM(ROI + template_crop / need_center_crop / template_scale)→ 外扩 roi →
  10. * Python 预处理 → NCC(img-center-orb-akaze.py)。恢复时取消下方对应块注释并改回 runPipeline 即可。
  11. *
  12. * 密钥与网关:优先根目录 config.js(openaiApiKey 等同步到环境变量),否则 nodejs/ai/config.js;模型常量见文件顶部。
  13. */
  14. // ---------------------------------------------------------------------------
  15. // 配置:模型与超时(优先改这里或对应环境变量 / 根目录 config.js)
  16. // ---------------------------------------------------------------------------
  17. /**
  18. * 多模态模型 id(**仅请求一次** img2text):在此直接写字符串;写 '' 则依次用
  19. * process.env.IMG_CENTER_OPENAI_MODEL、config.imgCenterOpenAiModel、nodejs/ai 默认链。
  20. */
  21. // const IMG_CENTER_OPENAI_MODEL = 'gemini-3.1-pro-preview'
  22. const IMG_CENTER_OPENAI_MODEL = 'gpt-5.4'
  23. /**
  24. * 中心点 VLM:主模型请求失败或 JSON 无法解析出有效 center_rx/center_ry 时,依次用备用 1、备用 2 再请求。
  25. * 链为 [主模型, 备用1, 备用2] 去重后取前 3 个,**最多 3 次** img2text。
  26. * 写 '' 表示该档跳过;环境变量 IMG_CENTER_FALLBACK_MODEL_1 / IMG_CENTER_FALLBACK_MODEL_2 优先于常量。
  27. *
  28. * Claude 4.6 选型(多模态「对齐两点坐标」类任务,公开对比多认为 Opus 图像分析略强于 Sonnet,但本任务输出极短 JSON):
  29. * - 首选备用:**claude-sonnet-4-6**(延迟/成本更友好,视觉足够)。
  30. * - 第二轮:**claude-opus-4-6**(能力上限略高,仍不稳再上)。
  31. * - *-thinking:推理链更长、更慢更贵;非复杂推理时一般**不必**作默认备用。
  32. * OpenAI 网关可改回如 gpt-4o / gpt-4.1。
  33. */
  34. const IMG_CENTER_FALLBACK_MODEL_1 = 'claude-opus-4-6'
  35. const IMG_CENTER_FALLBACK_MODEL_2 = 'gemini-3.1-pro-preview'
  36. /** 中心点 img2text 最多调用次数(主 + 备用,去重后截断) */
  37. const IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS = 3
  38. /**
  39. * 视觉 API(img2text)请求超时(毫秒)
  40. * 环境变量 IMG_CENTER_AI_TIMEOUT_MS 可覆盖
  41. */
  42. const IMG_CENTER_AI_TIMEOUT_MS = Math.max(
  43. 10_000,
  44. parseInt(String(process.env.IMG_CENTER_AI_TIMEOUT_MS || '').trim(), 10) || 300_000
  45. )
  46. /** 在截图上绘制中心点绿圈的 Python 脚本超时(毫秒);环境变量 IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS */
  47. const IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS = Math.max(
  48. 5000,
  49. parseInt(String(process.env.IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS || '').trim(), 10) || 30_000
  50. )
  51. /** 【旧 NCC 流程】runPipeline 内等待 Matched.png 就绪的上限(毫秒) */
  52. const MATCHED_PNG_MAX_WAIT_MS = 60_000
  53. /** 【旧 NCC 流程】Python NCC 匹配脚本 spawn 超时(毫秒) */
  54. const PYTHON_ORB_SCRIPT_TIMEOUT_MS = 120_000
  55. /**
  56. * 【旧 NCC 流程】NCC 最低分:传给 Python(环境变量 IMG_CENTER_NCC_MIN_SCORE)。
  57. */
  58. const IMG_CENTER_NCC_MIN_SCORE_DEFAULT = 0.34
  59. /** 【旧预处理】模板预处理脚本 spawn 超时(毫秒);环境变量 IMG_CENTER_PREPROCESS_TIMEOUT_MS 可覆盖 */
  60. const PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS = Math.max(
  61. 5000,
  62. parseInt(String(process.env.IMG_CENTER_PREPROCESS_TIMEOUT_MS || '').trim(), 10) || 60_000
  63. )
  64. /**
  65. * 【旧 VLM ROI】外扩比例,环境变量 IMG_CENTER_ROI_PAD 可覆盖
  66. */
  67. const IMG_CENTER_ROI_PAD = Math.max(
  68. 0,
  69. Math.min(
  70. 0.12,
  71. parseFloat(String(process.env.IMG_CENTER_ROI_PAD || '').trim()) || 0.03
  72. )
  73. )
  74. /**
  75. * 【旧 VLM ROI】归一化高度下限;IMG_CENTER_ROI_MIN_REL_H,默认 0.15
  76. */
  77. const IMG_CENTER_ROI_MIN_REL_H = Math.max(
  78. 0.06,
  79. Math.min(
  80. 0.45,
  81. parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_H || '').trim()) || 0.15
  82. )
  83. )
  84. /**
  85. * 【旧 VLM ROI】归一化宽度下限(0 表示不强制);IMG_CENTER_ROI_MIN_REL_W
  86. */
  87. const IMG_CENTER_ROI_MIN_REL_W = (() => {
  88. const v = parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_W || '').trim())
  89. if (!Number.isFinite(v) || v <= 0) return 0
  90. return Math.max(0.06, Math.min(0.95, v))
  91. })()
  92. // ---------------------------------------------------------------------------
  93. // 依赖与工程路径
  94. // ---------------------------------------------------------------------------
  95. const path = require('path')
  96. const fs = require('fs')
  97. const { spawnSync } = require('child_process')
  98. const { getPythonExeFromConfig } = require('../../../../python-exe-from-config.js')
  99. const configPath = process.env.STATIC_ROOT
  100. ? path.join(path.dirname(path.resolve(process.env.STATIC_ROOT)), 'config.js')
  101. : path.join(__dirname, '..', '..', '..', '..', '..', 'config.js')
  102. const config = fs.existsSync(configPath) ? require(configPath) : {}
  103. const projectRoot = (config.projectRoot && fs.existsSync(config.projectRoot))
  104. ? config.projectRoot
  105. : path.dirname(path.resolve(configPath))
  106. /** 在加载 nodejs/ai 之前同步,使 ai/config 能读到应用级密钥与网关 */
  107. function syncProcessEnvFromAppConfig () {
  108. const k = config.openaiApiKey || config.vlmApiKey
  109. if (
  110. k &&
  111. !String(process.env.API_KEY || '').trim() &&
  112. !String(process.env.OPENAI_API_KEY || '').trim() &&
  113. !String(process.env.VLM_API_KEY || '').trim()
  114. ) {
  115. process.env.API_KEY = String(k).trim()
  116. }
  117. const u = config.openaiApiUrl
  118. if (
  119. u &&
  120. !String(process.env.BASE_URL || '').trim() &&
  121. !String(process.env.OPENAI_API_URL || '').trim()
  122. ) {
  123. process.env.BASE_URL = String(u).trim().replace(/\/$/, '')
  124. }
  125. }
  126. syncProcessEnvFromAppConfig()
  127. const aiRoot = path.join(__dirname, '..', '..', '..', '..', 'ai')
  128. const aiModule = require(path.join(aiRoot, 'ai.js'))
  129. const img2textRequest = require(path.join(aiRoot, 'request', 'img2text.js'))
  130. const aiPackageConfig = require(path.join(aiRoot, 'config.js'))
  131. /** 【旧 NCC / 预处理】脚本路径(当前 runPipeline 不调用;恢复旧流程时用) */
  132. const orbScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-orb-akaze.py')
  133. const preprocessTemplateScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-preprocess-template.py')
  134. /** 在截图上标出 VLM 中心点(绿圈)的可视化脚本 */
  135. const markCenterScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-mark-center.py')
  136. /**
  137. * 【当前】仅问中心点:图1=截图,图2=模板;返回 center_rx、center_ry ∈ [0,1](相对图1 宽高)。
  138. */
  139. const VLM_CENTER_ONLY_PROMPT = `你收到两张图,顺序固定:
  140. 图1:Android 手机完整截图(与 adb screencap 一致),逻辑像素宽约 W、高约 H;坐标原点在左上角,x 向右、y 向下。
  141. 图2:模板图。请在图1 中找到与图2 视觉上对应的同一区域(同一图标、缩略图格子、按钮等)。
  142. 任务:给出图2 在图1 中**匹配可见区域的几何中心点**(该区域中心,不是图2 整张文件的画布中心)。
  143. 只输出一个 JSON 对象,必须包含:
  144. - "center_rx"、"center_ry":数字,取值在 [0,1],分别为该中心点在图1 上相对宽度 W、高度 H 的归一化坐标(左缘=0,上缘=0)。
  145. 若图1 中完全无法对应图2,两个键均填 null。
  146. 禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。`
  147. /*
  148. * ---------- 旧 ROI + 模板几何 VLM 提示(NCC 流程用,保留勿删)----------
  149. const VLM_USER_PROMPT = `你收到两张图,顺序固定:
  150. 图1:Android 手机竖屏完整截图,逻辑像素宽 W、高 H。
  151. 图2:模板图——要在图1画面里定位的同一内容。常见情况:图2 是 PC/自动化侧通过 ADB 推到手机上的**原始文件**(与磁盘上打开的同一像素内容),**不是**相册 App 已处理后的内部版本。图1 里若出现该图,往往是**系统相册/图库**里的**网格缩略图**:会先对原图做**缩小**,且方格多为**近似正方形**,竖图/横图常被**居中裁成方图**再显示,与图2 原始长宽比可能完全不同。
  152. 任务:在图1中找到与图2视觉上对应的那一块区域(同一控件、同一相册格里的缩略图等),给出「搜索矩形」roi_hint;并在适用时给出 template_crop、template_scale、need_center_crop,使下游能**复现相册里那一格的「裁切 + 缩放」**,用于模板匹配。
  153. 硬性规则(roi_hint):
  154. - roi_hint 的四个数必须是相对图1 的归一化坐标:rx0,ry0 为矩形左上角,rx1,ry1 为右下角,均在 [0,1],且 rx0<rx1、ry0<ry1。
  155. - 【只框图2】图2 里**只有**某一图标/按钮时,roi 应主要覆盖图1里**与之对应的那一块**,不要为了「多装点内容」而把**竖直方向**上、与图2无关的相邻 Tab 图标(例如在图1里与目标**横向并排**的其它底栏图标)一起框进竖长条;底栏场景下各图标是**横排**的,roi 应是**偏横向的条带**包住目标及窄边距,而不是上下堆叠多个无关图标。
  156. - 【必须完整】图2 模板在图1里所对应的那一整块 UI(含圆角、描边、阴影等可见像素)必须**全部**落在 roi 矩形内部,**任何一边都不得裁切**到模板上的图形;若宁可 roi 明显大一点也要保证完整。
  157. - 【自检】若你意识到按当前四个数裁图会「切掉」图2 上任意可见部分(例如只框到红色按钮的下半截、圆角被切、+ 号缺一截),**必须**把 ry0 上移或 ry1 下移、或放宽 rx,直到不会裁切。
  158. - 【底部栏】目标在屏底导航栏时:roi 的纵向高度 (ry1−ry0) 建议至少为屏高的 **14%~22%**,且 ry0 要足够靠上,使整块圆形/圆角按钮(含完整外轮廓)都在框内;**禁止**高度小于屏高 **12%** 的扁条。横向 (rx1−rx0) 以刚好包住目标按钮宽度 + 左右各约 5%~15% 屏宽为宜。
  159. - 【可大一些】在已完整包含上述目标的前提下,roi 宁可略大勿小:各方向外扩边距避免贴边裁切;图2 近似正方形时,roi **勿**做成「竖远长于横」的窄竖条(除非图2 本身就是竖长条)。
  160. - 若图1中存在多处相似元素,选择与图2内容最一致、最可能是用户意图的一处;若完全无法对应,四个坐标全填 null。
  161. template_scale(模拟相册把图缩小进格子的比例):
  162. - 数字,范围建议 0.05~1.0,表示在**已按下方顺序处理完 template_crop 与 need_center_crop 之后**,对图2 再做**等比线性缩放**(宽、高同比例);1 表示不在此步缩小。
  163. - 估法:对比**图1 里目标格中的缩略图**与**图2 原始文件**——若屏上格子里的内容明显是「整图缩小后的局部/整体」,应给 **小于 1**(相册格常见约 **0.2~0.5**,视分辨率与格大小而定);若图1 里几乎 1:1 对应原图2 像素内容则接近 1。
  164. - **不要**把 template_scale 理解成随意数字;应绑定:**对齐「ADB 原图 → 相册网格里显示尺寸」的缩放**。
  165. template_crop(相对**原始图2** 宽高的归一化矩形 cx0,cy0,cx1,cy1 ∈ [0,1],cx0<cx1、cy0<cy1):
  166. - **相册/多宫格场景**:表示「图1 里那一格缩略图**所对应的原图2 上的可见区域**」——即:若把原图2 按相册逻辑裁切后才会得到与格内一致的画面,应用此矩形框出原图2 上的该区域。竖长图在方格里通常只显示**中间一条/一块**,此时 crop 应是**接近正方形或略竖/略横的矩形**,**不要**默认填 0,0,1,1 除非图1 明确显示的是「整图缩进格内、无裁切」。
  167. - **非相册场景**(图标、按钮、整段 UI):可表示去掉白边、只保留主体;无需裁切则 0,0,1,1。
  168. - 若你能较准给出「格内所见 ↔ 原图2」的对应关系,应优先给出**非全图**的 template_crop;与 need_center_crop 配合见下。
  169. need_center_crop:
  170. - 当图1 明显是「相册/多宫格选图」、缩略图为**方格**且图2 与格内显示的长宽比不一致(典型:竖图进方格)时为 **true**;否则 **false**。
  171. - 为 true 时:roi_hint 仍只框**与图2 对应的那一格**(含格线外极少边距)。下游会在 template_crop 结果上再作**居中取最大正方形**,以逼近系统相册方格裁切;因此若你已在 template_crop 里给出了**精确的方格可见区域**(本身已接近正方形),可将 need_center_crop 设为 **false**,避免几何重复。
  172. - 若相册场景下**无法**可靠估计 template_crop,可 **template_crop 填 0,0,1,1** 且 **need_center_crop 为 true**,由程序用「整图居中裁方」兜底。
  173. confidence:0~1,可随意填;下游**不使用**该字段做拒识,仅作记录。
  174. 只输出一个 JSON 对象,顶层键必须包含:roi_hint、need_center_crop、confidence、template_scale、template_crop。
  175. roi_hint 为对象,键 "rx0","ry0","rx1","ry1"(数字或 null)。
  176. template_crop 为对象,键 "cx0","cy0","cx1","cy1"(数字)。
  177. 禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。`
  178. * ---------- 旧 VLM_USER_PROMPT 结束 ----------
  179. */
  180. const tagName = 'img-center-point-location'
  181. const schema = {
  182. description:
  183. '在屏幕截图中查找模板并返回中心点;主模型无效坐标时自动换备用模型(最多 3 次)。成功时保存 screenshot_center_marked.png。原 ROI+预处理+NCC 已注释保留。',
  184. inputs: { template: '模板图片路径(inVars[0])', variable: '输出变量名(outVars)' },
  185. outputs: { variable: '中心点 {x,y}(对象)' },
  186. }
  187. // ---------------------------------------------------------------------------
  188. // 以下为 runPipeline 主流程中的调用顺序(自上而下与执行顺序一致)
  189. // ---------------------------------------------------------------------------
  190. /** 流程目录下的 tmp,例如 static/process/GenerateNote/tmp */
  191. function resolveWorkflowTmpRoot (folderPath) {
  192. if (folderPath && typeof folderPath === 'string') {
  193. const fp = path.isAbsolute(folderPath) ? folderPath : path.join(projectRoot, folderPath)
  194. return path.join(fp, 'tmp')
  195. }
  196. return path.join(projectRoot, 'tmp')
  197. }
  198. function getAdbPath () {
  199. return config.adbPath?.path
  200. ? (path.isAbsolute(config.adbPath.path) ? config.adbPath.path : path.resolve(projectRoot, config.adbPath.path))
  201. : path.join(projectRoot, 'lib', 'scrcpy-adb', process.platform === 'win32' ? 'adb.exe' : 'adb')
  202. }
  203. function adbScreencapPng (adbPath, device, outFile) {
  204. const r = spawnSync(adbPath, ['-s', device, 'exec-out', 'screencap', '-p'], {
  205. encoding: 'buffer',
  206. maxBuffer: 40 * 1024 * 1024,
  207. windowsHide: true,
  208. })
  209. if (r.status !== 0 || !r.stdout || r.stdout.length < 100) return false
  210. fs.mkdirSync(path.dirname(outFile), { recursive: true })
  211. fs.writeFileSync(outFile, r.stdout)
  212. return true
  213. }
  214. function fileToDataUrlPng (absPath) {
  215. const buf = fs.readFileSync(absPath)
  216. const b64 = buf.toString('base64')
  217. return `data:image/png;base64,${b64}`
  218. }
  219. /** 从模型返回文本中抽出 JSON 对象 */
  220. function parseVlmJson (text) {
  221. let s = String(text || '').trim()
  222. const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/i)
  223. if (fence) s = fence[1].trim()
  224. const m = s.match(/\{[\s\S]*\}/)
  225. if (!m) return null
  226. try {
  227. return JSON.parse(m[0])
  228. } catch (_) {
  229. return null
  230. }
  231. }
  232. /** 读取 PNG IHDR 宽高(adb screencap -p 为 PNG) */
  233. function readPngIhdrDimensions (absPath) {
  234. try {
  235. const fd = fs.openSync(absPath, 'r')
  236. const buf = Buffer.allocUnsafe(24)
  237. fs.readSync(fd, buf, 0, 24, 0)
  238. fs.closeSync(fd)
  239. if (buf.length < 24 || buf[0] !== 0x89) return null
  240. const w = buf.readUInt32BE(16)
  241. const h = buf.readUInt32BE(20)
  242. if (!Number.isFinite(w) || !Number.isFinite(h) || w < 1 || h < 1) return null
  243. return { width: w, height: h }
  244. } catch (_) {
  245. return null
  246. }
  247. }
  248. function clamp01 (x) {
  249. if (x === null || x === undefined) return null
  250. const n = Number(x)
  251. if (!Number.isFinite(n)) return null
  252. return Math.max(0, Math.min(1, n))
  253. }
  254. /**
  255. * 从 VLM JSON 解析归一化中心;优先 center_rx/center_ry,其次 cx/cy;
  256. * 若仅有 center_x/center_y:大于 1 时按像素除以 width/height,否则按归一化。
  257. */
  258. function parseCenterNormalizedFromVlm (parsed, width, height) {
  259. if (!parsed || typeof parsed !== 'object') return null
  260. let crx = clamp01(parsed.center_rx)
  261. let cry = clamp01(parsed.center_ry)
  262. if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
  263. crx = clamp01(parsed.cx)
  264. cry = clamp01(parsed.cy)
  265. if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
  266. const px = Number(parsed.center_x)
  267. const py = Number(parsed.center_y)
  268. if (!Number.isFinite(px) || !Number.isFinite(py)) return null
  269. if (width > 0 && height > 0 && (px > 1 || py > 1)) {
  270. crx = clamp01(px / width)
  271. cry = clamp01(py / height)
  272. if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
  273. }
  274. crx = clamp01(px)
  275. cry = clamp01(py)
  276. if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
  277. return null
  278. }
  279. /** 实际发往 API 的模型名:见顶部 IMG_CENTER_OPENAI_MODEL → env / config → nodejs/ai。 */
  280. function getImgCenterModel () {
  281. const explicit =
  282. String(IMG_CENTER_OPENAI_MODEL || '').trim() ||
  283. String(process.env.IMG_CENTER_OPENAI_MODEL || '').trim() ||
  284. (config.imgCenterOpenAiModel && String(config.imgCenterOpenAiModel).trim()) ||
  285. ''
  286. return img2textRequest.resolveImgCenterModel(explicit || undefined)
  287. }
  288. /** 第一次仅用 OpenAI 兼容网关;若配置成 doubao 则改用 nodejs/ai 的 IMG_CENTER_MODEL(避免首轮即豆包) */
  289. function getPrimaryOpenAiImgCenterModel () {
  290. const m = getImgCenterModel()
  291. if (m && String(m).toLowerCase() === 'doubao') {
  292. const fb = String(aiPackageConfig.IMG_CENTER_MODEL || '').trim()
  293. if (fb && fb.toLowerCase() !== 'doubao') return fb
  294. return 'gpt-5.4'
  295. }
  296. return m
  297. }
  298. function resolveFallbackCenterModelId (envKey, constVal) {
  299. const e = String(process.env[envKey] || '').trim()
  300. if (e) return e
  301. return String(constVal != null ? constVal : '').trim()
  302. }
  303. /**
  304. * 中心点 img2text 模型链:[主模型, 备用1, 备用2] 去重后取前 IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS 个。
  305. */
  306. function getCenterPointVlmModelChain () {
  307. const primary = String(getPrimaryOpenAiImgCenterModel() || '').trim()
  308. const fb1 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1)
  309. const fb2 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2)
  310. const chain = []
  311. const push = (id) => {
  312. const x = String(id || '').trim()
  313. if (!x) return
  314. if (!chain.includes(x)) chain.push(x)
  315. }
  316. push(primary)
  317. push(fb1)
  318. push(fb2)
  319. return chain.slice(0, IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS)
  320. }
  321. function getImgCenterAiMeta () {
  322. return {
  323. model: getImgCenterModel(),
  324. primaryOpenAiModel: getPrimaryOpenAiImgCenterModel(),
  325. centerPointVlmModelChain: getCenterPointVlmModelChain(),
  326. fallbackModel1: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1),
  327. fallbackModel2: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2),
  328. baseUrl: aiPackageConfig.BASE_URL,
  329. openAiKeyConfigured: !!(aiPackageConfig.API_KEY && String(aiPackageConfig.API_KEY).trim()),
  330. }
  331. }
  332. /**
  333. * 【当前】单次 img2text(指定 model);每次尝试写入 openai_raw_attempt_{i}.json。
  334. * @returns {{ ok: boolean, data?: object, error?: string, model: string, rawResp?: object }}
  335. */
  336. async function callOpenAiCenterPointWithModel (workDir, screenshotPath, templatePath, modelName, attemptIndex) {
  337. syncProcessEnvFromAppConfig()
  338. const openAiKey = String(aiPackageConfig.API_KEY || '').trim()
  339. const model = String(modelName || '').trim()
  340. const screenUrl = fileToDataUrlPng(screenshotPath)
  341. const tplUrl = fileToDataUrlPng(templatePath)
  342. const imageUrls = [screenUrl, tplUrl]
  343. try {
  344. if (!openAiKey) {
  345. return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', model }
  346. }
  347. if (!model) {
  348. return { ok: false, error: '模型 id 为空', model }
  349. }
  350. const result = await aiModule.run('img2text', VLM_CENTER_ONLY_PROMPT, imageUrls, {
  351. timeoutMs: IMG_CENTER_AI_TIMEOUT_MS,
  352. model,
  353. })
  354. const resp = result.data
  355. const attemptPayload = {
  356. model,
  357. attemptIndex,
  358. httpSuccess: result.success,
  359. httpError: result.success ? null : (result.error || null),
  360. response: resp,
  361. }
  362. fs.writeFileSync(
  363. path.join(workDir, `openai_raw_attempt_${attemptIndex}.json`),
  364. JSON.stringify(attemptPayload, null, 2),
  365. 'utf8'
  366. )
  367. if (!result.success) {
  368. return { ok: false, error: result.error || 'VLM 请求失败', model }
  369. }
  370. const content = resp?.choices?.[0]?.message?.content
  371. const parsed = parseVlmJson(content)
  372. if (!parsed || typeof parsed !== 'object') {
  373. return { ok: false, error: '无法解析模型返回为 JSON', model, rawResp: resp }
  374. }
  375. return { ok: true, data: parsed, model, rawResp: resp }
  376. } catch (e) {
  377. const msg = e && e.message ? e.message : String(e)
  378. try {
  379. fs.writeFileSync(path.join(workDir, `openai_error_attempt_${attemptIndex}.txt`), msg, 'utf8')
  380. } catch (_) {}
  381. return { ok: false, error: msg, model }
  382. }
  383. }
  384. /*
  385. * ---------- 旧 callOpenAiRoi(ROI + 模板几何,NCC 流程用,保留勿删)----------
  386. async function callOpenAiRoi (workDir, screenshotPath, templatePath) {
  387. syncProcessEnvFromAppConfig()
  388. const openAiKey = String(aiPackageConfig.API_KEY || '').trim()
  389. const screenUrl = fileToDataUrlPng(screenshotPath)
  390. const tplUrl = fileToDataUrlPng(templatePath)
  391. const imageUrls = [screenUrl, tplUrl]
  392. const emptyFallback = {
  393. roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 },
  394. need_center_crop: false,
  395. confidence: 0,
  396. template_scale: 1,
  397. template_crop: { cx0: 0, cy0: 0, cx1: 1, cy1: 1 },
  398. template_vlm_preprocessed: false,
  399. }
  400. try {
  401. if (!openAiKey) {
  402. return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', fallback: emptyFallback }
  403. }
  404. const openAiModel = getPrimaryOpenAiImgCenterModel()
  405. const result = await aiModule.run('img2text', VLM_USER_PROMPT, imageUrls, {
  406. timeoutMs: IMG_CENTER_AI_TIMEOUT_MS,
  407. model: openAiModel,
  408. })
  409. if (!result.success) {
  410. return { ok: false, error: result.error || 'VLM 请求失败', fallback: emptyFallback }
  411. }
  412. const resp = result.data
  413. const content = resp?.choices?.[0]?.message?.content
  414. const parsed = parseVlmJson(content)
  415. if (!parsed || typeof parsed !== 'object') {
  416. return { ok: false, error: '无法解析模型返回为 JSON', fallback: emptyFallback }
  417. }
  418. fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(resp, null, 2), 'utf8')
  419. return { ok: true, data: parsed }
  420. } catch (e) {
  421. const msg = e && e.message ? e.message : String(e)
  422. return { ok: false, error: msg, fallback: emptyFallback }
  423. }
  424. }
  425. * ---------- 旧 callOpenAiRoi 结束 ----------
  426. */
  427. /*
  428. * ---------- 旧 normalizeTemplateGeometry / normalizeVlmPayload / expandRoiHintNormalized / isFullScreenRoiHint(保留勿删)----------
  429. function normalizeTemplateGeometry (obj) {
  430. const c = obj && obj.template_crop
  431. let cx0 = 0
  432. let cy0 = 0
  433. let cx1 = 1
  434. let cy1 = 1
  435. if (c && typeof c === 'object') {
  436. const n = (k, d) => {
  437. const v = Number(c[k])
  438. return Number.isFinite(v) ? Math.max(0, Math.min(1, v)) : d
  439. }
  440. cx0 = n('cx0', 0)
  441. cy0 = n('cy0', 0)
  442. cx1 = n('cx1', 1)
  443. cy1 = n('cy1', 1)
  444. if (cx1 <= cx0) {
  445. cx0 = 0
  446. cx1 = 1
  447. }
  448. if (cy1 <= cy0) {
  449. cy0 = 0
  450. cy1 = 1
  451. }
  452. }
  453. let sc = Number(obj && obj.template_scale)
  454. if (!Number.isFinite(sc) || sc <= 0) sc = 1
  455. sc = Math.max(0.05, Math.min(1, sc))
  456. return {
  457. template_crop: { cx0, cy0, cx1, cy1 },
  458. template_scale: sc,
  459. }
  460. }
  461. function normalizeVlmPayload (obj) {
  462. const rh = obj.roi_hint || {}
  463. const nums = ['rx0', 'ry0', 'rx1', 'ry1']
  464. let bad = false
  465. for (const k of nums) {
  466. const v = rh[k]
  467. if (v === null || v === undefined) bad = true
  468. }
  469. const tg = normalizeTemplateGeometry(obj)
  470. if (bad) {
  471. return {
  472. roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 },
  473. need_center_crop: false,
  474. confidence: 0,
  475. template_scale: tg.template_scale,
  476. template_crop: tg.template_crop,
  477. template_vlm_preprocessed: false,
  478. }
  479. }
  480. const c = Number(obj.confidence)
  481. const conf = Number.isFinite(c) ? Math.max(0, Math.min(1, c)) : 0
  482. return {
  483. roi_hint: {
  484. rx0: Number(rh.rx0),
  485. ry0: Number(rh.ry0),
  486. rx1: Number(rh.rx1),
  487. ry1: Number(rh.ry1),
  488. },
  489. need_center_crop: !!obj.need_center_crop,
  490. confidence: conf,
  491. template_scale: tg.template_scale,
  492. template_crop: tg.template_crop,
  493. template_vlm_preprocessed: false,
  494. }
  495. }
  496. function expandRoiHintNormalized (rh) {
  497. let rx0 = Number(rh.rx0)
  498. let ry0 = Number(rh.ry0)
  499. let rx1 = Number(rh.rx1)
  500. let ry1 = Number(rh.ry1)
  501. if (!(rx1 > rx0 && ry1 > ry0)) return rh
  502. const pad = IMG_CENTER_ROI_PAD
  503. rx0 = Math.max(0, rx0 - pad)
  504. ry0 = Math.max(0, ry0 - pad)
  505. rx1 = Math.min(1, rx1 + pad)
  506. ry1 = Math.min(1, ry1 + pad)
  507. let h = ry1 - ry0
  508. const hMin = IMG_CENTER_ROI_MIN_REL_H
  509. if (h < hMin) {
  510. const deficit = hMin - h
  511. const bottomAnchored = ry1 >= 0.88
  512. if (bottomAnchored) {
  513. ry0 = Math.max(0, ry0 - deficit)
  514. h = ry1 - ry0
  515. if (h < hMin) {
  516. ry1 = Math.min(1, ry0 + hMin)
  517. h = ry1 - ry0
  518. if (h < hMin) ry0 = Math.max(0, ry1 - hMin)
  519. }
  520. } else {
  521. const cy = (ry0 + ry1) / 2
  522. ry0 = Math.max(0, cy - hMin / 2)
  523. ry1 = Math.min(1, ry0 + hMin)
  524. if (ry1 >= 1 - 1e-9) ry0 = Math.max(0, 1 - hMin)
  525. }
  526. }
  527. const wMin = IMG_CENTER_ROI_MIN_REL_W
  528. if (wMin > 0) {
  529. let w = rx1 - rx0
  530. if (w < wMin) {
  531. const cx = (rx0 + rx1) / 2
  532. rx0 = Math.max(0, cx - wMin / 2)
  533. rx1 = Math.min(1, rx0 + wMin)
  534. if (rx1 >= 1 - 1e-9) rx0 = Math.max(0, 1 - wMin)
  535. }
  536. }
  537. return { rx0, ry0, rx1, ry1 }
  538. }
  539. function isFullScreenRoiHint (rh) {
  540. return (
  541. Math.abs(Number(rh.rx0)) < 1e-9 &&
  542. Math.abs(Number(rh.ry0)) < 1e-9 &&
  543. Math.abs(Number(rh.rx1) - 1) < 1e-9 &&
  544. Math.abs(Number(rh.ry1) - 1) < 1e-9
  545. )
  546. }
  547. * ---------- 旧几何归一化结束 ----------
  548. */
  549. /*
  550. * ---------- 旧 getPythonPath / runTemplatePreprocess / waitUntilMatchedWritten(保留勿删)----------
  551. function getPythonPath () {
  552. const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64')
  553. const envPy = path.join(base, 'env', 'Scripts', 'python.exe')
  554. const scriptsPy = path.join(base, 'Scripts', 'python.exe')
  555. const pyEmbedded = path.join(base, 'py', 'python.exe')
  556. if (fs.existsSync(envPy)) return envPy
  557. if (fs.existsSync(scriptsPy)) return scriptsPy
  558. if (fs.existsSync(pyEmbedded)) return pyEmbedded
  559. return 'python'
  560. }
  561. function runTemplatePreprocess (pythonPath, templateAbsPath, workDir) {
  562. if (!fs.existsSync(preprocessTemplateScriptPath)) {
  563. return { ok: false, error: `未找到 ${preprocessTemplateScriptPath}` }
  564. }
  565. const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
  566. if (process.platform === 'win32') {
  567. const pyDir = path.dirname(pythonPath)
  568. const pyRoot = path.dirname(path.dirname(pyDir))
  569. env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
  570. }
  571. const r = spawnSync(
  572. pythonPath,
  573. [preprocessTemplateScriptPath, '--src', templateAbsPath, '--work-dir', workDir],
  574. { encoding: 'utf-8', timeout: PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS, env, cwd: projectRoot, windowsHide: true }
  575. )
  576. if (r.status !== 0) {
  577. const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '模板预处理失败'
  578. return { ok: false, error: msg }
  579. }
  580. try {
  581. const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || ''
  582. const j = JSON.parse(tail)
  583. if (!j.success) return { ok: false, error: j.error || '模板预处理失败' }
  584. return { ok: true, meta: j }
  585. } catch (e) {
  586. return { ok: false, error: '模板预处理输出非 JSON' }
  587. }
  588. }
  589. function waitUntilMatchedWritten (absPath, maxMs) {
  590. const matchedWaitMs =
  591. maxMs != null
  592. ? maxMs
  593. : (process.env.IMG_MATCH_MATCHED_WAIT_MS
  594. ? Math.max(5000, parseInt(process.env.IMG_MATCH_MATCHED_WAIT_MS, 10) || 30000)
  595. : 30000)
  596. if (!absPath) return true
  597. const t0 = Date.now()
  598. let lastSize = -1
  599. let stableStart = 0
  600. const STABLE_MS = 120
  601. while (Date.now() - t0 < matchedWaitMs) {
  602. try {
  603. if (fs.existsSync(absPath)) {
  604. const st = fs.statSync(absPath)
  605. if (st.size >= 32) {
  606. if (st.size === lastSize) {
  607. if (Date.now() - stableStart >= STABLE_MS) return true
  608. } else {
  609. lastSize = st.size
  610. stableStart = Date.now()
  611. }
  612. }
  613. }
  614. } catch (_) {}
  615. const until = Date.now() + 35
  616. while (Date.now() < until) {}
  617. }
  618. try {
  619. return fs.existsSync(absPath) && fs.statSync(absPath).size >= 32
  620. } catch (_) {
  621. return false
  622. }
  623. }
  624. * ---------- 旧 getPythonPath / 预处理 / Matched 等待结束 ----------
  625. */
  626. function resolvePythonExecutable () {
  627. return getPythonExeFromConfig(config)
  628. }
  629. /**
  630. * 在 screenshot 上以绿色空心圆标出中心点,写入 workDir/screenshot_center_marked.png。
  631. * 圆半径可由环境变量 IMG_CENTER_MARK_RADIUS(正整数像素)覆盖,否则由脚本按分辨率估算。
  632. * @returns {{ ok: boolean, outPath?: string, error?: string }}
  633. */
  634. function drawCenterMarkOnScreenshot (workDir, screenshotPath, centerX, centerY) {
  635. if (!fs.existsSync(markCenterScriptPath)) {
  636. return { ok: false, error: `未找到 ${markCenterScriptPath}` }
  637. }
  638. const outPath = path.join(workDir, 'screenshot_center_marked.png')
  639. const pythonPath = resolvePythonExecutable()
  640. const args = [
  641. markCenterScriptPath,
  642. '--input', screenshotPath,
  643. '--output', outPath,
  644. '--x', String(Math.round(centerX)),
  645. '--y', String(Math.round(centerY)),
  646. ]
  647. const rEnv = parseInt(String(process.env.IMG_CENTER_MARK_RADIUS || '').trim(), 10)
  648. if (Number.isFinite(rEnv) && rEnv > 0) {
  649. args.push('--radius', String(rEnv))
  650. }
  651. const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
  652. if (process.platform === 'win32') {
  653. const pyDir = path.dirname(pythonPath)
  654. const pyRoot = path.dirname(path.dirname(pyDir))
  655. env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
  656. }
  657. const r = spawnSync(pythonPath, args, {
  658. encoding: 'utf-8',
  659. timeout: IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS,
  660. env,
  661. cwd: projectRoot,
  662. windowsHide: true,
  663. })
  664. if (r.status !== 0) {
  665. const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '绘制中心点标记失败'
  666. return { ok: false, error: msg }
  667. }
  668. try {
  669. const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || ''
  670. const j = JSON.parse(tail)
  671. if (!j.success) return { ok: false, error: j.error || '绘制中心点标记失败' }
  672. } catch (_) {
  673. if (!fs.existsSync(outPath) || fs.statSync(outPath).size < 32) {
  674. return { ok: false, error: '标记脚本输出非 JSON 或未写出有效 PNG' }
  675. }
  676. }
  677. return { ok: true, outPath }
  678. }
  679. async function runPipeline (device, templateAbsPath, folderPath) {
  680. if (!device) return { success: false, error: '缺少设备 ID' }
  681. if (!templateAbsPath || !fs.existsSync(templateAbsPath)) {
  682. return { success: false, error: `模板不存在: ${templateAbsPath}` }
  683. }
  684. const tmpRoot = resolveWorkflowTmpRoot(folderPath)
  685. fs.mkdirSync(tmpRoot, { recursive: true })
  686. const workDir = path.join(tmpRoot, `img-center-${Date.now()}`)
  687. fs.mkdirSync(workDir, { recursive: true })
  688. const screenshotPath = path.join(workDir, 'screenshot.png')
  689. const templateInWork = path.join(workDir, 'template.png')
  690. const adbPath = getAdbPath()
  691. if (!adbScreencapPng(adbPath, device, screenshotPath)) {
  692. return { success: false, error: 'ADB 截图失败' }
  693. }
  694. fs.copyFileSync(templateAbsPath, templateInWork)
  695. const dims = readPngIhdrDimensions(screenshotPath)
  696. if (!dims) {
  697. return { success: false, error: '无法读取截图 PNG 尺寸(IHDR)', workDir }
  698. }
  699. const modelChain = getCenterPointVlmModelChain()
  700. if (modelChain.length === 0) {
  701. const err = '未配置可用 VLM 模型'
  702. fs.writeFileSync(path.join(workDir, 'openai_error.txt'), err, 'utf8')
  703. return { success: false, error: err, workDir }
  704. }
  705. const attemptLog = []
  706. let lastError = ''
  707. /** @type {{ center_rx: number, center_ry: number } | null} */
  708. let norm = null
  709. let aiData = null
  710. let successRaw = null
  711. let successModel = null
  712. for (let i = 0; i < modelChain.length; i++) {
  713. const m = modelChain[i]
  714. const ai = await callOpenAiCenterPointWithModel(workDir, screenshotPath, templateInWork, m, i)
  715. const normTry = ai.ok ? parseCenterNormalizedFromVlm(ai.data, dims.width, dims.height) : null
  716. attemptLog.push({
  717. index: i,
  718. model: m,
  719. requestOk: ai.ok,
  720. error: ai.ok ? null : ai.error,
  721. hasValidCenter: !!normTry,
  722. })
  723. if (!ai.ok) {
  724. lastError = ai.error || 'VLM 中心点失败'
  725. continue
  726. }
  727. if (normTry) {
  728. norm = normTry
  729. aiData = ai.data
  730. successRaw = ai.rawResp
  731. successModel = m
  732. break
  733. }
  734. lastError = '模型未返回有效中心点(需 center_rx/center_ry 或兼容字段)'
  735. }
  736. try {
  737. fs.writeFileSync(
  738. path.join(workDir, 'vlm_center_model_attempts.json'),
  739. JSON.stringify(
  740. {
  741. model_chain: modelChain,
  742. success_model: successModel,
  743. attempts: attemptLog,
  744. },
  745. null,
  746. 2
  747. ),
  748. 'utf8'
  749. )
  750. } catch (_) {}
  751. if (!norm || !aiData) {
  752. fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(lastError || 'unknown'), 'utf8')
  753. fs.writeFileSync(path.join(workDir, 'center_parse_error.txt'), String(lastError || 'unknown'), 'utf8')
  754. return { success: false, error: lastError || 'VLM 中心点失败', workDir }
  755. }
  756. try {
  757. if (successRaw) {
  758. fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(successRaw, null, 2), 'utf8')
  759. }
  760. fs.writeFileSync(path.join(workDir, 'vlm_center_parsed.json'), JSON.stringify(aiData, null, 2), 'utf8')
  761. } catch (_) {}
  762. const px = Math.round(norm.center_rx * dims.width)
  763. const py = Math.round(norm.center_ry * dims.height)
  764. const ix = Math.max(0, Math.min(dims.width - 1, px))
  765. const iy = Math.max(0, Math.min(dims.height - 1, py))
  766. let markedScreenshotPath = null
  767. const mark = drawCenterMarkOnScreenshot(workDir, screenshotPath, ix, iy)
  768. if (mark.ok) {
  769. markedScreenshotPath = mark.outPath || null
  770. } else {
  771. fs.writeFileSync(
  772. path.join(workDir, 'screenshot_center_mark_error.txt'),
  773. String(mark.error || 'unknown'),
  774. 'utf8'
  775. )
  776. }
  777. fs.writeFileSync(
  778. path.join(workDir, 'vlm_center_result.json'),
  779. JSON.stringify(
  780. {
  781. center_rx: norm.center_rx,
  782. center_ry: norm.center_ry,
  783. pixel_x: ix,
  784. pixel_y: iy,
  785. screenshot_width: dims.width,
  786. screenshot_height: dims.height,
  787. marked_screenshot: markedScreenshotPath
  788. ? path.basename(markedScreenshotPath)
  789. : null,
  790. marked_screenshot_error: markedScreenshotPath ? null : (mark.error || '未生成标记图'),
  791. },
  792. null,
  793. 2
  794. ),
  795. 'utf8'
  796. )
  797. return {
  798. success: true,
  799. coordinate: { x: ix, y: iy, width: 1, height: 1 },
  800. clickPosition: { x: ix, y: iy },
  801. workDir,
  802. markedScreenshotPath,
  803. }
  804. }
  805. /*
  806. * ---------- 旧 runPipeline(ROI + 预处理 + NCC,保留勿删)----------
  807. async function runPipeline_OLD_NCC (device, templateAbsPath, folderPath) {
  808. if (!device) return { success: false, error: '缺少设备 ID' }
  809. if (!templateAbsPath || !fs.existsSync(templateAbsPath)) {
  810. return { success: false, error: `模板不存在: ${templateAbsPath}` }
  811. }
  812. if (!fs.existsSync(orbScriptPath)) {
  813. return { success: false, error: `未找到 ${orbScriptPath}` }
  814. }
  815. if (!fs.existsSync(preprocessTemplateScriptPath)) {
  816. return { success: false, error: `未找到 ${preprocessTemplateScriptPath}` }
  817. }
  818. const tmpRoot = resolveWorkflowTmpRoot(folderPath)
  819. fs.mkdirSync(tmpRoot, { recursive: true })
  820. const workDir = path.join(tmpRoot, `img-center-${Date.now()}`)
  821. fs.mkdirSync(workDir, { recursive: true })
  822. const screenshotPath = path.join(workDir, 'screenshot.png')
  823. const templateInWork = path.join(workDir, 'template.png')
  824. const matchedPath = path.join(workDir, 'Matched.png')
  825. const adbPath = getAdbPath()
  826. if (!adbScreencapPng(adbPath, device, screenshotPath)) {
  827. return { success: false, error: 'ADB 截图失败' }
  828. }
  829. fs.copyFileSync(templateAbsPath, templateInWork)
  830. const ai = await callOpenAiRoi(workDir, screenshotPath, templateInWork)
  831. if (!ai.ok) {
  832. fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(ai.error || 'unknown'), 'utf8')
  833. return { success: false, error: ai.error || 'VLM ROI 失败', workDir }
  834. }
  835. let payload = normalizeVlmPayload(ai.data)
  836. if (!isFullScreenRoiHint(payload.roi_hint)) {
  837. payload = {
  838. ...payload,
  839. roi_hint: expandRoiHintNormalized(payload.roi_hint),
  840. }
  841. }
  842. fs.writeFileSync(path.join(workDir, 'vlm_roi.json'), JSON.stringify(payload, null, 2), 'utf8')
  843. const pythonPath = getPythonPath()
  844. const prep = runTemplatePreprocess(pythonPath, templateAbsPath, workDir)
  845. if (!prep.ok) {
  846. fs.copyFileSync(templateAbsPath, templateInWork)
  847. try {
  848. const vrPath = path.join(workDir, 'vlm_roi.json')
  849. const vr = JSON.parse(fs.readFileSync(vrPath, 'utf8'))
  850. vr.template_vlm_preprocessed = false
  851. delete vr.template_preprocess_paths
  852. fs.writeFileSync(vrPath, JSON.stringify(vr, null, 2), 'utf8')
  853. } catch (_) {}
  854. fs.writeFileSync(path.join(workDir, 'template_preprocess_error.txt'), String(prep.error || ''), 'utf8')
  855. }
  856. const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
  857. if (!String(env.IMG_CENTER_NCC_MIN_SCORE || '').trim()) {
  858. env.IMG_CENTER_NCC_MIN_SCORE = String(IMG_CENTER_NCC_MIN_SCORE_DEFAULT)
  859. }
  860. if (process.platform === 'win32') {
  861. const pyDir = path.dirname(pythonPath)
  862. const pyRoot = path.dirname(path.dirname(pyDir))
  863. env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
  864. }
  865. const r = spawnSync(
  866. pythonPath,
  867. [orbScriptPath, '--work-dir', workDir],
  868. { encoding: 'utf-8', timeout: PYTHON_ORB_SCRIPT_TIMEOUT_MS, env, cwd: projectRoot }
  869. )
  870. if (r.status !== 0) {
  871. const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || 'NCC 匹配脚本失败'
  872. return { success: false, error: msg, workDir }
  873. }
  874. let out
  875. try {
  876. out = JSON.parse(r.stdout.trim())
  877. } catch (e) {
  878. return { success: false, error: `脚本输出非 JSON: ${(r.stdout || '').slice(0, 300)}`, workDir }
  879. }
  880. if (!out.success) return { success: false, error: out.error || '匹配失败', workDir }
  881. if (!waitUntilMatchedWritten(matchedPath, MATCHED_PNG_MAX_WAIT_MS)) {
  882. return { success: false, error: `Matched.png 未就绪: ${matchedPath}`, workDir }
  883. }
  884. return {
  885. success: true,
  886. coordinate: { x: out.x, y: out.y, width: out.width, height: out.height },
  887. clickPosition: { x: out.center_x, y: out.center_y },
  888. workDir,
  889. }
  890. }
  891. * ---------- 旧 runPipeline 结束 ----------
  892. */
  893. /**
  894. * press/locate:Electron 侧 await;返回 { success, coordinate?, clickPosition?, error? }
  895. * @param {string} [folderPath] 当前流程目录(如 .../static/process/GenerateNote),临时文件写入其下 tmp/
  896. */
  897. async function matchImageAndGetCoordinate (device, imagePath, folderPath) {
  898. const templatePath = path.isAbsolute(imagePath) ? imagePath : path.resolve(projectRoot, imagePath)
  899. const r = await runPipeline(device, templatePath, folderPath)
  900. if (!r.success) return { success: false, error: r.error }
  901. return {
  902. success: true,
  903. coordinate: r.coordinate,
  904. clickPosition: r.clickPosition,
  905. markedScreenshotPath: r.markedScreenshotPath || null,
  906. }
  907. }
  908. async function executeImgCenterPointLocation ({ device, template, folderPath }) {
  909. if (!device) return { success: false, error: '缺少设备 ID,无法自动获取截图' }
  910. if (!template || typeof template !== 'string') return { success: false, error: '缺少模板图片路径(inVars[0])' }
  911. const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot
  912. const isAbsoluteOrDrive = template.startsWith('/') || template.includes(':')
  913. const hasSubPath = template.includes('/') || template.includes(path.sep)
  914. const templatePath = isAbsoluteOrDrive ? template : (hasSubPath ? path.join(baseDir, template) : path.join(baseDir, 'resources', template))
  915. const result = await runPipeline(device, templatePath, folderPath)
  916. if (!result.success) return { success: false, error: result.error }
  917. const center = result.clickPosition || {
  918. x: result.coordinate.x + result.coordinate.width / 2,
  919. y: result.coordinate.y + result.coordinate.height / 2,
  920. }
  921. return {
  922. success: true,
  923. center,
  924. coordinate: result.coordinate,
  925. workDir: result.workDir,
  926. markedScreenshotPath: result.markedScreenshotPath || null,
  927. }
  928. }
  929. module.exports = {
  930. tagName,
  931. schema,
  932. executeImgCenterPointLocation,
  933. matchImageAndGetCoordinate,
  934. /** @deprecated 与 matchImageAndGetCoordinate 相同 */
  935. matchImageAndGetCoordinateAsync: matchImageAndGetCoordinate,
  936. /** 解析后的模型名 + 当前 ai 包 baseUrl(测试 / 调试) */
  937. getImgCenterModel,
  938. getImgCenterAiMeta,
  939. /** 中心点 VLM 实际调用链(主 + 备用,≤3) */
  940. getCenterPointVlmModelChain,
  941. }