| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035 |
- /**
- * fun 标签:img-center-point-location
- * 仅需 inVars[0]:模板路径。
- *
- * 【当前逻辑】tmp → ADB 截图 + 模板复制 → VLM img2text(主模型 + 最多 2 个备用模型,**共≤3 次**;
- * 仅在「请求失败」或「无有效中心点坐标」时换模型重试)→ 归一化转像素 →
- * 可选:Python 在截图上画**绿色圆圈**标出中心点,保存为 `screenshot_center_marked.png`(同一次 `img-center-时间戳` 目录)。
- *
- * 【已注释保留】原流程:VLM(ROI + template_crop / need_center_crop / template_scale)→ 外扩 roi →
- * Python 预处理 → NCC(img-center-orb-akaze.py)。恢复时取消下方对应块注释并改回 runPipeline 即可。
- *
- * 密钥与网关:优先根目录 config.js(openaiApiKey 等同步到环境变量),否则 nodejs/ai/config.js;模型常量见文件顶部。
- */
- // ---------------------------------------------------------------------------
- // 配置:模型与超时(优先改这里或对应环境变量 / 根目录 config.js)
- // ---------------------------------------------------------------------------
- /**
- * 多模态模型 id(**仅请求一次** img2text):在此直接写字符串;写 '' 则依次用
- * process.env.IMG_CENTER_OPENAI_MODEL、config.imgCenterOpenAiModel、nodejs/ai 默认链。
- */
- // const IMG_CENTER_OPENAI_MODEL = 'gemini-3.1-pro-preview'
- const IMG_CENTER_OPENAI_MODEL = 'gpt-5.4'
- /**
- * 中心点 VLM:主模型请求失败或 JSON 无法解析出有效 center_rx/center_ry 时,依次用备用 1、备用 2 再请求。
- * 链为 [主模型, 备用1, 备用2] 去重后取前 3 个,**最多 3 次** img2text。
- * 写 '' 表示该档跳过;环境变量 IMG_CENTER_FALLBACK_MODEL_1 / IMG_CENTER_FALLBACK_MODEL_2 优先于常量。
- *
- * Claude 4.6 选型(多模态「对齐两点坐标」类任务,公开对比多认为 Opus 图像分析略强于 Sonnet,但本任务输出极短 JSON):
- * - 首选备用:**claude-sonnet-4-6**(延迟/成本更友好,视觉足够)。
- * - 第二轮:**claude-opus-4-6**(能力上限略高,仍不稳再上)。
- * - *-thinking:推理链更长、更慢更贵;非复杂推理时一般**不必**作默认备用。
- * OpenAI 网关可改回如 gpt-4o / gpt-4.1。
- */
- const IMG_CENTER_FALLBACK_MODEL_1 = 'claude-opus-4-6'
- const IMG_CENTER_FALLBACK_MODEL_2 = 'gemini-3.1-pro-preview'
- /** 中心点 img2text 最多调用次数(主 + 备用,去重后截断) */
- const IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS = 3
- /**
- * 视觉 API(img2text)请求超时(毫秒)
- * 环境变量 IMG_CENTER_AI_TIMEOUT_MS 可覆盖
- */
- const IMG_CENTER_AI_TIMEOUT_MS = Math.max(
- 10_000,
- parseInt(String(process.env.IMG_CENTER_AI_TIMEOUT_MS || '').trim(), 10) || 300_000
- )
- /** 在截图上绘制中心点绿圈的 Python 脚本超时(毫秒);环境变量 IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS */
- const IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS = Math.max(
- 5000,
- parseInt(String(process.env.IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS || '').trim(), 10) || 30_000
- )
- /** 【旧 NCC 流程】runPipeline 内等待 Matched.png 就绪的上限(毫秒) */
- const MATCHED_PNG_MAX_WAIT_MS = 60_000
- /** 【旧 NCC 流程】Python NCC 匹配脚本 spawn 超时(毫秒) */
- const PYTHON_ORB_SCRIPT_TIMEOUT_MS = 120_000
- /**
- * 【旧 NCC 流程】NCC 最低分:传给 Python(环境变量 IMG_CENTER_NCC_MIN_SCORE)。
- */
- const IMG_CENTER_NCC_MIN_SCORE_DEFAULT = 0.34
- /** 【旧预处理】模板预处理脚本 spawn 超时(毫秒);环境变量 IMG_CENTER_PREPROCESS_TIMEOUT_MS 可覆盖 */
- const PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS = Math.max(
- 5000,
- parseInt(String(process.env.IMG_CENTER_PREPROCESS_TIMEOUT_MS || '').trim(), 10) || 60_000
- )
- /**
- * 【旧 VLM ROI】外扩比例,环境变量 IMG_CENTER_ROI_PAD 可覆盖
- */
- const IMG_CENTER_ROI_PAD = Math.max(
- 0,
- Math.min(
- 0.12,
- parseFloat(String(process.env.IMG_CENTER_ROI_PAD || '').trim()) || 0.03
- )
- )
- /**
- * 【旧 VLM ROI】归一化高度下限;IMG_CENTER_ROI_MIN_REL_H,默认 0.15
- */
- const IMG_CENTER_ROI_MIN_REL_H = Math.max(
- 0.06,
- Math.min(
- 0.45,
- parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_H || '').trim()) || 0.15
- )
- )
- /**
- * 【旧 VLM ROI】归一化宽度下限(0 表示不强制);IMG_CENTER_ROI_MIN_REL_W
- */
- const IMG_CENTER_ROI_MIN_REL_W = (() => {
- const v = parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_W || '').trim())
- if (!Number.isFinite(v) || v <= 0) return 0
- return Math.max(0.06, Math.min(0.95, v))
- })()
- // ---------------------------------------------------------------------------
- // 依赖与工程路径
- // ---------------------------------------------------------------------------
- const path = require('path')
- const fs = require('fs')
- const { spawnSync } = require('child_process')
- const { getPythonExeFromConfig } = require('../../../../python-exe-from-config.js')
- const configPath = process.env.STATIC_ROOT
- ? path.join(path.dirname(path.resolve(process.env.STATIC_ROOT)), 'config.js')
- : path.join(__dirname, '..', '..', '..', '..', '..', 'config.js')
- const config = fs.existsSync(configPath) ? require(configPath) : {}
- const projectRoot = (config.projectRoot && fs.existsSync(config.projectRoot))
- ? config.projectRoot
- : path.dirname(path.resolve(configPath))
- /** 在加载 nodejs/ai 之前同步,使 ai/config 能读到应用级密钥与网关 */
- function syncProcessEnvFromAppConfig () {
- const k = config.openaiApiKey || config.vlmApiKey
- if (
- k &&
- !String(process.env.API_KEY || '').trim() &&
- !String(process.env.OPENAI_API_KEY || '').trim() &&
- !String(process.env.VLM_API_KEY || '').trim()
- ) {
- process.env.API_KEY = String(k).trim()
- }
- const u = config.openaiApiUrl
- if (
- u &&
- !String(process.env.BASE_URL || '').trim() &&
- !String(process.env.OPENAI_API_URL || '').trim()
- ) {
- process.env.BASE_URL = String(u).trim().replace(/\/$/, '')
- }
- }
- syncProcessEnvFromAppConfig()
- const aiRoot = path.join(__dirname, '..', '..', '..', '..', 'ai')
- const aiModule = require(path.join(aiRoot, 'ai.js'))
- const img2textRequest = require(path.join(aiRoot, 'request', 'img2text.js'))
- const aiPackageConfig = require(path.join(aiRoot, 'config.js'))
- /** 【旧 NCC / 预处理】脚本路径(当前 runPipeline 不调用;恢复旧流程时用) */
- const orbScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-orb-akaze.py')
- const preprocessTemplateScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-preprocess-template.py')
- /** 在截图上标出 VLM 中心点(绿圈)的可视化脚本 */
- const markCenterScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-mark-center.py')
- /**
- * 【当前】仅问中心点:图1=截图,图2=模板;返回 center_rx、center_ry ∈ [0,1](相对图1 宽高)。
- */
- const VLM_CENTER_ONLY_PROMPT = `你收到两张图,顺序固定:
- 图1:Android 手机完整截图(与 adb screencap 一致),逻辑像素宽约 W、高约 H;坐标原点在左上角,x 向右、y 向下。
- 图2:模板图。请在图1 中找到与图2 视觉上对应的同一区域(同一图标、缩略图格子、按钮等)。
- 任务:给出图2 在图1 中**匹配可见区域的几何中心点**(该区域中心,不是图2 整张文件的画布中心)。
- 只输出一个 JSON 对象,必须包含:
- - "center_rx"、"center_ry":数字,取值在 [0,1],分别为该中心点在图1 上相对宽度 W、高度 H 的归一化坐标(左缘=0,上缘=0)。
- 若图1 中完全无法对应图2,两个键均填 null。
- 禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。`
- /*
- * ---------- 旧 ROI + 模板几何 VLM 提示(NCC 流程用,保留勿删)----------
- const VLM_USER_PROMPT = `你收到两张图,顺序固定:
- 图1:Android 手机竖屏完整截图,逻辑像素宽 W、高 H。
- 图2:模板图——要在图1画面里定位的同一内容。常见情况:图2 是 PC/自动化侧通过 ADB 推到手机上的**原始文件**(与磁盘上打开的同一像素内容),**不是**相册 App 已处理后的内部版本。图1 里若出现该图,往往是**系统相册/图库**里的**网格缩略图**:会先对原图做**缩小**,且方格多为**近似正方形**,竖图/横图常被**居中裁成方图**再显示,与图2 原始长宽比可能完全不同。
- 任务:在图1中找到与图2视觉上对应的那一块区域(同一控件、同一相册格里的缩略图等),给出「搜索矩形」roi_hint;并在适用时给出 template_crop、template_scale、need_center_crop,使下游能**复现相册里那一格的「裁切 + 缩放」**,用于模板匹配。
- 硬性规则(roi_hint):
- - roi_hint 的四个数必须是相对图1 的归一化坐标:rx0,ry0 为矩形左上角,rx1,ry1 为右下角,均在 [0,1],且 rx0<rx1、ry0<ry1。
- - 【只框图2】图2 里**只有**某一图标/按钮时,roi 应主要覆盖图1里**与之对应的那一块**,不要为了「多装点内容」而把**竖直方向**上、与图2无关的相邻 Tab 图标(例如在图1里与目标**横向并排**的其它底栏图标)一起框进竖长条;底栏场景下各图标是**横排**的,roi 应是**偏横向的条带**包住目标及窄边距,而不是上下堆叠多个无关图标。
- - 【必须完整】图2 模板在图1里所对应的那一整块 UI(含圆角、描边、阴影等可见像素)必须**全部**落在 roi 矩形内部,**任何一边都不得裁切**到模板上的图形;若宁可 roi 明显大一点也要保证完整。
- - 【自检】若你意识到按当前四个数裁图会「切掉」图2 上任意可见部分(例如只框到红色按钮的下半截、圆角被切、+ 号缺一截),**必须**把 ry0 上移或 ry1 下移、或放宽 rx,直到不会裁切。
- - 【底部栏】目标在屏底导航栏时:roi 的纵向高度 (ry1−ry0) 建议至少为屏高的 **14%~22%**,且 ry0 要足够靠上,使整块圆形/圆角按钮(含完整外轮廓)都在框内;**禁止**高度小于屏高 **12%** 的扁条。横向 (rx1−rx0) 以刚好包住目标按钮宽度 + 左右各约 5%~15% 屏宽为宜。
- - 【可大一些】在已完整包含上述目标的前提下,roi 宁可略大勿小:各方向外扩边距避免贴边裁切;图2 近似正方形时,roi **勿**做成「竖远长于横」的窄竖条(除非图2 本身就是竖长条)。
- - 若图1中存在多处相似元素,选择与图2内容最一致、最可能是用户意图的一处;若完全无法对应,四个坐标全填 null。
- template_scale(模拟相册把图缩小进格子的比例):
- - 数字,范围建议 0.05~1.0,表示在**已按下方顺序处理完 template_crop 与 need_center_crop 之后**,对图2 再做**等比线性缩放**(宽、高同比例);1 表示不在此步缩小。
- - 估法:对比**图1 里目标格中的缩略图**与**图2 原始文件**——若屏上格子里的内容明显是「整图缩小后的局部/整体」,应给 **小于 1**(相册格常见约 **0.2~0.5**,视分辨率与格大小而定);若图1 里几乎 1:1 对应原图2 像素内容则接近 1。
- - **不要**把 template_scale 理解成随意数字;应绑定:**对齐「ADB 原图 → 相册网格里显示尺寸」的缩放**。
- template_crop(相对**原始图2** 宽高的归一化矩形 cx0,cy0,cx1,cy1 ∈ [0,1],cx0<cx1、cy0<cy1):
- - **相册/多宫格场景**:表示「图1 里那一格缩略图**所对应的原图2 上的可见区域**」——即:若把原图2 按相册逻辑裁切后才会得到与格内一致的画面,应用此矩形框出原图2 上的该区域。竖长图在方格里通常只显示**中间一条/一块**,此时 crop 应是**接近正方形或略竖/略横的矩形**,**不要**默认填 0,0,1,1 除非图1 明确显示的是「整图缩进格内、无裁切」。
- - **非相册场景**(图标、按钮、整段 UI):可表示去掉白边、只保留主体;无需裁切则 0,0,1,1。
- - 若你能较准给出「格内所见 ↔ 原图2」的对应关系,应优先给出**非全图**的 template_crop;与 need_center_crop 配合见下。
- need_center_crop:
- - 当图1 明显是「相册/多宫格选图」、缩略图为**方格**且图2 与格内显示的长宽比不一致(典型:竖图进方格)时为 **true**;否则 **false**。
- - 为 true 时:roi_hint 仍只框**与图2 对应的那一格**(含格线外极少边距)。下游会在 template_crop 结果上再作**居中取最大正方形**,以逼近系统相册方格裁切;因此若你已在 template_crop 里给出了**精确的方格可见区域**(本身已接近正方形),可将 need_center_crop 设为 **false**,避免几何重复。
- - 若相册场景下**无法**可靠估计 template_crop,可 **template_crop 填 0,0,1,1** 且 **need_center_crop 为 true**,由程序用「整图居中裁方」兜底。
- confidence:0~1,可随意填;下游**不使用**该字段做拒识,仅作记录。
- 只输出一个 JSON 对象,顶层键必须包含:roi_hint、need_center_crop、confidence、template_scale、template_crop。
- roi_hint 为对象,键 "rx0","ry0","rx1","ry1"(数字或 null)。
- template_crop 为对象,键 "cx0","cy0","cx1","cy1"(数字)。
- 禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。`
- * ---------- 旧 VLM_USER_PROMPT 结束 ----------
- */
- const tagName = 'img-center-point-location'
- const schema = {
- description:
- '在屏幕截图中查找模板并返回中心点;主模型无效坐标时自动换备用模型(最多 3 次)。成功时保存 screenshot_center_marked.png。原 ROI+预处理+NCC 已注释保留。',
- inputs: { template: '模板图片路径(inVars[0])', variable: '输出变量名(outVars)' },
- outputs: { variable: '中心点 {x,y}(对象)' },
- }
- // ---------------------------------------------------------------------------
- // 以下为 runPipeline 主流程中的调用顺序(自上而下与执行顺序一致)
- // ---------------------------------------------------------------------------
- /** 流程目录下的 tmp,例如 static/process/GenerateNote/tmp */
- function resolveWorkflowTmpRoot (folderPath) {
- if (folderPath && typeof folderPath === 'string') {
- const fp = path.isAbsolute(folderPath) ? folderPath : path.join(projectRoot, folderPath)
- return path.join(fp, 'tmp')
- }
- return path.join(projectRoot, 'tmp')
- }
- function getAdbPath () {
- return config.adbPath?.path
- ? (path.isAbsolute(config.adbPath.path) ? config.adbPath.path : path.resolve(projectRoot, config.adbPath.path))
- : path.join(projectRoot, 'lib', 'scrcpy-adb', process.platform === 'win32' ? 'adb.exe' : 'adb')
- }
- function adbScreencapPng (adbPath, device, outFile) {
- const r = spawnSync(adbPath, ['-s', device, 'exec-out', 'screencap', '-p'], {
- encoding: 'buffer',
- maxBuffer: 40 * 1024 * 1024,
- windowsHide: true,
- })
- if (r.status !== 0 || !r.stdout || r.stdout.length < 100) return false
- fs.mkdirSync(path.dirname(outFile), { recursive: true })
- fs.writeFileSync(outFile, r.stdout)
- return true
- }
- function fileToDataUrlPng (absPath) {
- const buf = fs.readFileSync(absPath)
- const b64 = buf.toString('base64')
- return `data:image/png;base64,${b64}`
- }
- /** 从模型返回文本中抽出 JSON 对象 */
- function parseVlmJson (text) {
- let s = String(text || '').trim()
- const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/i)
- if (fence) s = fence[1].trim()
- const m = s.match(/\{[\s\S]*\}/)
- if (!m) return null
- try {
- return JSON.parse(m[0])
- } catch (_) {
- return null
- }
- }
- /** 读取 PNG IHDR 宽高(adb screencap -p 为 PNG) */
- function readPngIhdrDimensions (absPath) {
- try {
- const fd = fs.openSync(absPath, 'r')
- const buf = Buffer.allocUnsafe(24)
- fs.readSync(fd, buf, 0, 24, 0)
- fs.closeSync(fd)
- if (buf.length < 24 || buf[0] !== 0x89) return null
- const w = buf.readUInt32BE(16)
- const h = buf.readUInt32BE(20)
- if (!Number.isFinite(w) || !Number.isFinite(h) || w < 1 || h < 1) return null
- return { width: w, height: h }
- } catch (_) {
- return null
- }
- }
- function clamp01 (x) {
- if (x === null || x === undefined) return null
- const n = Number(x)
- if (!Number.isFinite(n)) return null
- return Math.max(0, Math.min(1, n))
- }
- /**
- * 从 VLM JSON 解析归一化中心;优先 center_rx/center_ry,其次 cx/cy;
- * 若仅有 center_x/center_y:大于 1 时按像素除以 width/height,否则按归一化。
- */
- function parseCenterNormalizedFromVlm (parsed, width, height) {
- if (!parsed || typeof parsed !== 'object') return null
- let crx = clamp01(parsed.center_rx)
- let cry = clamp01(parsed.center_ry)
- if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
- crx = clamp01(parsed.cx)
- cry = clamp01(parsed.cy)
- if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
- const px = Number(parsed.center_x)
- const py = Number(parsed.center_y)
- if (!Number.isFinite(px) || !Number.isFinite(py)) return null
- if (width > 0 && height > 0 && (px > 1 || py > 1)) {
- crx = clamp01(px / width)
- cry = clamp01(py / height)
- if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
- }
- crx = clamp01(px)
- cry = clamp01(py)
- if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
- return null
- }
- /** 实际发往 API 的模型名:见顶部 IMG_CENTER_OPENAI_MODEL → env / config → nodejs/ai。 */
- function getImgCenterModel () {
- const explicit =
- String(IMG_CENTER_OPENAI_MODEL || '').trim() ||
- String(process.env.IMG_CENTER_OPENAI_MODEL || '').trim() ||
- (config.imgCenterOpenAiModel && String(config.imgCenterOpenAiModel).trim()) ||
- ''
- return img2textRequest.resolveImgCenterModel(explicit || undefined)
- }
- /** 第一次仅用 OpenAI 兼容网关;若配置成 doubao 则改用 nodejs/ai 的 IMG_CENTER_MODEL(避免首轮即豆包) */
- function getPrimaryOpenAiImgCenterModel () {
- const m = getImgCenterModel()
- if (m && String(m).toLowerCase() === 'doubao') {
- const fb = String(aiPackageConfig.IMG_CENTER_MODEL || '').trim()
- if (fb && fb.toLowerCase() !== 'doubao') return fb
- return 'gpt-5.4'
- }
- return m
- }
- function resolveFallbackCenterModelId (envKey, constVal) {
- const e = String(process.env[envKey] || '').trim()
- if (e) return e
- return String(constVal != null ? constVal : '').trim()
- }
- /**
- * 中心点 img2text 模型链:[主模型, 备用1, 备用2] 去重后取前 IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS 个。
- */
- function getCenterPointVlmModelChain () {
- const primary = String(getPrimaryOpenAiImgCenterModel() || '').trim()
- const fb1 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1)
- const fb2 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2)
- const chain = []
- const push = (id) => {
- const x = String(id || '').trim()
- if (!x) return
- if (!chain.includes(x)) chain.push(x)
- }
- push(primary)
- push(fb1)
- push(fb2)
- return chain.slice(0, IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS)
- }
- function getImgCenterAiMeta () {
- return {
- model: getImgCenterModel(),
- primaryOpenAiModel: getPrimaryOpenAiImgCenterModel(),
- centerPointVlmModelChain: getCenterPointVlmModelChain(),
- fallbackModel1: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1),
- fallbackModel2: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2),
- baseUrl: aiPackageConfig.BASE_URL,
- openAiKeyConfigured: !!(aiPackageConfig.API_KEY && String(aiPackageConfig.API_KEY).trim()),
- }
- }
- /**
- * 【当前】单次 img2text(指定 model);每次尝试写入 openai_raw_attempt_{i}.json。
- * @returns {{ ok: boolean, data?: object, error?: string, model: string, rawResp?: object }}
- */
- async function callOpenAiCenterPointWithModel (workDir, screenshotPath, templatePath, modelName, attemptIndex) {
- syncProcessEnvFromAppConfig()
- const openAiKey = String(aiPackageConfig.API_KEY || '').trim()
- const model = String(modelName || '').trim()
- const screenUrl = fileToDataUrlPng(screenshotPath)
- const tplUrl = fileToDataUrlPng(templatePath)
- const imageUrls = [screenUrl, tplUrl]
- try {
- if (!openAiKey) {
- return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', model }
- }
- if (!model) {
- return { ok: false, error: '模型 id 为空', model }
- }
- const result = await aiModule.run('img2text', VLM_CENTER_ONLY_PROMPT, imageUrls, {
- timeoutMs: IMG_CENTER_AI_TIMEOUT_MS,
- model,
- })
- const resp = result.data
- const attemptPayload = {
- model,
- attemptIndex,
- httpSuccess: result.success,
- httpError: result.success ? null : (result.error || null),
- response: resp,
- }
- fs.writeFileSync(
- path.join(workDir, `openai_raw_attempt_${attemptIndex}.json`),
- JSON.stringify(attemptPayload, null, 2),
- 'utf8'
- )
- if (!result.success) {
- return { ok: false, error: result.error || 'VLM 请求失败', model }
- }
- const content = resp?.choices?.[0]?.message?.content
- const parsed = parseVlmJson(content)
- if (!parsed || typeof parsed !== 'object') {
- return { ok: false, error: '无法解析模型返回为 JSON', model, rawResp: resp }
- }
- return { ok: true, data: parsed, model, rawResp: resp }
- } catch (e) {
- const msg = e && e.message ? e.message : String(e)
- try {
- fs.writeFileSync(path.join(workDir, `openai_error_attempt_${attemptIndex}.txt`), msg, 'utf8')
- } catch (_) {}
- return { ok: false, error: msg, model }
- }
- }
- /*
- * ---------- 旧 callOpenAiRoi(ROI + 模板几何,NCC 流程用,保留勿删)----------
- async function callOpenAiRoi (workDir, screenshotPath, templatePath) {
- syncProcessEnvFromAppConfig()
- const openAiKey = String(aiPackageConfig.API_KEY || '').trim()
- const screenUrl = fileToDataUrlPng(screenshotPath)
- const tplUrl = fileToDataUrlPng(templatePath)
- const imageUrls = [screenUrl, tplUrl]
- const emptyFallback = {
- roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 },
- need_center_crop: false,
- confidence: 0,
- template_scale: 1,
- template_crop: { cx0: 0, cy0: 0, cx1: 1, cy1: 1 },
- template_vlm_preprocessed: false,
- }
- try {
- if (!openAiKey) {
- return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', fallback: emptyFallback }
- }
- const openAiModel = getPrimaryOpenAiImgCenterModel()
- const result = await aiModule.run('img2text', VLM_USER_PROMPT, imageUrls, {
- timeoutMs: IMG_CENTER_AI_TIMEOUT_MS,
- model: openAiModel,
- })
- if (!result.success) {
- return { ok: false, error: result.error || 'VLM 请求失败', fallback: emptyFallback }
- }
- const resp = result.data
- const content = resp?.choices?.[0]?.message?.content
- const parsed = parseVlmJson(content)
- if (!parsed || typeof parsed !== 'object') {
- return { ok: false, error: '无法解析模型返回为 JSON', fallback: emptyFallback }
- }
- fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(resp, null, 2), 'utf8')
- return { ok: true, data: parsed }
- } catch (e) {
- const msg = e && e.message ? e.message : String(e)
- return { ok: false, error: msg, fallback: emptyFallback }
- }
- }
- * ---------- 旧 callOpenAiRoi 结束 ----------
- */
- /*
- * ---------- 旧 normalizeTemplateGeometry / normalizeVlmPayload / expandRoiHintNormalized / isFullScreenRoiHint(保留勿删)----------
- function normalizeTemplateGeometry (obj) {
- const c = obj && obj.template_crop
- let cx0 = 0
- let cy0 = 0
- let cx1 = 1
- let cy1 = 1
- if (c && typeof c === 'object') {
- const n = (k, d) => {
- const v = Number(c[k])
- return Number.isFinite(v) ? Math.max(0, Math.min(1, v)) : d
- }
- cx0 = n('cx0', 0)
- cy0 = n('cy0', 0)
- cx1 = n('cx1', 1)
- cy1 = n('cy1', 1)
- if (cx1 <= cx0) {
- cx0 = 0
- cx1 = 1
- }
- if (cy1 <= cy0) {
- cy0 = 0
- cy1 = 1
- }
- }
- let sc = Number(obj && obj.template_scale)
- if (!Number.isFinite(sc) || sc <= 0) sc = 1
- sc = Math.max(0.05, Math.min(1, sc))
- return {
- template_crop: { cx0, cy0, cx1, cy1 },
- template_scale: sc,
- }
- }
- function normalizeVlmPayload (obj) {
- const rh = obj.roi_hint || {}
- const nums = ['rx0', 'ry0', 'rx1', 'ry1']
- let bad = false
- for (const k of nums) {
- const v = rh[k]
- if (v === null || v === undefined) bad = true
- }
- const tg = normalizeTemplateGeometry(obj)
- if (bad) {
- return {
- roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 },
- need_center_crop: false,
- confidence: 0,
- template_scale: tg.template_scale,
- template_crop: tg.template_crop,
- template_vlm_preprocessed: false,
- }
- }
- const c = Number(obj.confidence)
- const conf = Number.isFinite(c) ? Math.max(0, Math.min(1, c)) : 0
- return {
- roi_hint: {
- rx0: Number(rh.rx0),
- ry0: Number(rh.ry0),
- rx1: Number(rh.rx1),
- ry1: Number(rh.ry1),
- },
- need_center_crop: !!obj.need_center_crop,
- confidence: conf,
- template_scale: tg.template_scale,
- template_crop: tg.template_crop,
- template_vlm_preprocessed: false,
- }
- }
- function expandRoiHintNormalized (rh) {
- let rx0 = Number(rh.rx0)
- let ry0 = Number(rh.ry0)
- let rx1 = Number(rh.rx1)
- let ry1 = Number(rh.ry1)
- if (!(rx1 > rx0 && ry1 > ry0)) return rh
- const pad = IMG_CENTER_ROI_PAD
- rx0 = Math.max(0, rx0 - pad)
- ry0 = Math.max(0, ry0 - pad)
- rx1 = Math.min(1, rx1 + pad)
- ry1 = Math.min(1, ry1 + pad)
- let h = ry1 - ry0
- const hMin = IMG_CENTER_ROI_MIN_REL_H
- if (h < hMin) {
- const deficit = hMin - h
- const bottomAnchored = ry1 >= 0.88
- if (bottomAnchored) {
- ry0 = Math.max(0, ry0 - deficit)
- h = ry1 - ry0
- if (h < hMin) {
- ry1 = Math.min(1, ry0 + hMin)
- h = ry1 - ry0
- if (h < hMin) ry0 = Math.max(0, ry1 - hMin)
- }
- } else {
- const cy = (ry0 + ry1) / 2
- ry0 = Math.max(0, cy - hMin / 2)
- ry1 = Math.min(1, ry0 + hMin)
- if (ry1 >= 1 - 1e-9) ry0 = Math.max(0, 1 - hMin)
- }
- }
- const wMin = IMG_CENTER_ROI_MIN_REL_W
- if (wMin > 0) {
- let w = rx1 - rx0
- if (w < wMin) {
- const cx = (rx0 + rx1) / 2
- rx0 = Math.max(0, cx - wMin / 2)
- rx1 = Math.min(1, rx0 + wMin)
- if (rx1 >= 1 - 1e-9) rx0 = Math.max(0, 1 - wMin)
- }
- }
- return { rx0, ry0, rx1, ry1 }
- }
- function isFullScreenRoiHint (rh) {
- return (
- Math.abs(Number(rh.rx0)) < 1e-9 &&
- Math.abs(Number(rh.ry0)) < 1e-9 &&
- Math.abs(Number(rh.rx1) - 1) < 1e-9 &&
- Math.abs(Number(rh.ry1) - 1) < 1e-9
- )
- }
- * ---------- 旧几何归一化结束 ----------
- */
- /*
- * ---------- 旧 getPythonPath / runTemplatePreprocess / waitUntilMatchedWritten(保留勿删)----------
- function getPythonPath () {
- const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64')
- const envPy = path.join(base, 'env', 'Scripts', 'python.exe')
- const scriptsPy = path.join(base, 'Scripts', 'python.exe')
- const pyEmbedded = path.join(base, 'py', 'python.exe')
- if (fs.existsSync(envPy)) return envPy
- if (fs.existsSync(scriptsPy)) return scriptsPy
- if (fs.existsSync(pyEmbedded)) return pyEmbedded
- return 'python'
- }
- function runTemplatePreprocess (pythonPath, templateAbsPath, workDir) {
- if (!fs.existsSync(preprocessTemplateScriptPath)) {
- return { ok: false, error: `未找到 ${preprocessTemplateScriptPath}` }
- }
- const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
- if (process.platform === 'win32') {
- const pyDir = path.dirname(pythonPath)
- const pyRoot = path.dirname(path.dirname(pyDir))
- env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
- }
- const r = spawnSync(
- pythonPath,
- [preprocessTemplateScriptPath, '--src', templateAbsPath, '--work-dir', workDir],
- { encoding: 'utf-8', timeout: PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS, env, cwd: projectRoot, windowsHide: true }
- )
- if (r.status !== 0) {
- const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '模板预处理失败'
- return { ok: false, error: msg }
- }
- try {
- const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || ''
- const j = JSON.parse(tail)
- if (!j.success) return { ok: false, error: j.error || '模板预处理失败' }
- return { ok: true, meta: j }
- } catch (e) {
- return { ok: false, error: '模板预处理输出非 JSON' }
- }
- }
- function waitUntilMatchedWritten (absPath, maxMs) {
- const matchedWaitMs =
- maxMs != null
- ? maxMs
- : (process.env.IMG_MATCH_MATCHED_WAIT_MS
- ? Math.max(5000, parseInt(process.env.IMG_MATCH_MATCHED_WAIT_MS, 10) || 30000)
- : 30000)
- if (!absPath) return true
- const t0 = Date.now()
- let lastSize = -1
- let stableStart = 0
- const STABLE_MS = 120
- while (Date.now() - t0 < matchedWaitMs) {
- try {
- if (fs.existsSync(absPath)) {
- const st = fs.statSync(absPath)
- if (st.size >= 32) {
- if (st.size === lastSize) {
- if (Date.now() - stableStart >= STABLE_MS) return true
- } else {
- lastSize = st.size
- stableStart = Date.now()
- }
- }
- }
- } catch (_) {}
- const until = Date.now() + 35
- while (Date.now() < until) {}
- }
- try {
- return fs.existsSync(absPath) && fs.statSync(absPath).size >= 32
- } catch (_) {
- return false
- }
- }
- * ---------- 旧 getPythonPath / 预处理 / Matched 等待结束 ----------
- */
- function resolvePythonExecutable () {
- return getPythonExeFromConfig(config)
- }
- /**
- * 在 screenshot 上以绿色空心圆标出中心点,写入 workDir/screenshot_center_marked.png。
- * 圆半径可由环境变量 IMG_CENTER_MARK_RADIUS(正整数像素)覆盖,否则由脚本按分辨率估算。
- * @returns {{ ok: boolean, outPath?: string, error?: string }}
- */
- function drawCenterMarkOnScreenshot (workDir, screenshotPath, centerX, centerY) {
- if (!fs.existsSync(markCenterScriptPath)) {
- return { ok: false, error: `未找到 ${markCenterScriptPath}` }
- }
- const outPath = path.join(workDir, 'screenshot_center_marked.png')
- const pythonPath = resolvePythonExecutable()
- const args = [
- markCenterScriptPath,
- '--input', screenshotPath,
- '--output', outPath,
- '--x', String(Math.round(centerX)),
- '--y', String(Math.round(centerY)),
- ]
- const rEnv = parseInt(String(process.env.IMG_CENTER_MARK_RADIUS || '').trim(), 10)
- if (Number.isFinite(rEnv) && rEnv > 0) {
- args.push('--radius', String(rEnv))
- }
- const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
- if (process.platform === 'win32') {
- const pyDir = path.dirname(pythonPath)
- const pyRoot = path.dirname(path.dirname(pyDir))
- env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
- }
- const r = spawnSync(pythonPath, args, {
- encoding: 'utf-8',
- timeout: IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS,
- env,
- cwd: projectRoot,
- windowsHide: true,
- })
- if (r.status !== 0) {
- const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '绘制中心点标记失败'
- return { ok: false, error: msg }
- }
- try {
- const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || ''
- const j = JSON.parse(tail)
- if (!j.success) return { ok: false, error: j.error || '绘制中心点标记失败' }
- } catch (_) {
- if (!fs.existsSync(outPath) || fs.statSync(outPath).size < 32) {
- return { ok: false, error: '标记脚本输出非 JSON 或未写出有效 PNG' }
- }
- }
- return { ok: true, outPath }
- }
- async function runPipeline (device, templateAbsPath, folderPath) {
- if (!device) return { success: false, error: '缺少设备 ID' }
- if (!templateAbsPath || !fs.existsSync(templateAbsPath)) {
- return { success: false, error: `模板不存在: ${templateAbsPath}` }
- }
- const tmpRoot = resolveWorkflowTmpRoot(folderPath)
- fs.mkdirSync(tmpRoot, { recursive: true })
- const workDir = path.join(tmpRoot, `img-center-${Date.now()}`)
- fs.mkdirSync(workDir, { recursive: true })
- const screenshotPath = path.join(workDir, 'screenshot.png')
- const templateInWork = path.join(workDir, 'template.png')
- const adbPath = getAdbPath()
- if (!adbScreencapPng(adbPath, device, screenshotPath)) {
- return { success: false, error: 'ADB 截图失败' }
- }
- fs.copyFileSync(templateAbsPath, templateInWork)
- const dims = readPngIhdrDimensions(screenshotPath)
- if (!dims) {
- return { success: false, error: '无法读取截图 PNG 尺寸(IHDR)', workDir }
- }
- const modelChain = getCenterPointVlmModelChain()
- if (modelChain.length === 0) {
- const err = '未配置可用 VLM 模型'
- fs.writeFileSync(path.join(workDir, 'openai_error.txt'), err, 'utf8')
- return { success: false, error: err, workDir }
- }
- const attemptLog = []
- let lastError = ''
- /** @type {{ center_rx: number, center_ry: number } | null} */
- let norm = null
- let aiData = null
- let successRaw = null
- let successModel = null
- for (let i = 0; i < modelChain.length; i++) {
- const m = modelChain[i]
- const ai = await callOpenAiCenterPointWithModel(workDir, screenshotPath, templateInWork, m, i)
- const normTry = ai.ok ? parseCenterNormalizedFromVlm(ai.data, dims.width, dims.height) : null
- attemptLog.push({
- index: i,
- model: m,
- requestOk: ai.ok,
- error: ai.ok ? null : ai.error,
- hasValidCenter: !!normTry,
- })
- if (!ai.ok) {
- lastError = ai.error || 'VLM 中心点失败'
- continue
- }
- if (normTry) {
- norm = normTry
- aiData = ai.data
- successRaw = ai.rawResp
- successModel = m
- break
- }
- lastError = '模型未返回有效中心点(需 center_rx/center_ry 或兼容字段)'
- }
- try {
- fs.writeFileSync(
- path.join(workDir, 'vlm_center_model_attempts.json'),
- JSON.stringify(
- {
- model_chain: modelChain,
- success_model: successModel,
- attempts: attemptLog,
- },
- null,
- 2
- ),
- 'utf8'
- )
- } catch (_) {}
- if (!norm || !aiData) {
- fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(lastError || 'unknown'), 'utf8')
- fs.writeFileSync(path.join(workDir, 'center_parse_error.txt'), String(lastError || 'unknown'), 'utf8')
- return { success: false, error: lastError || 'VLM 中心点失败', workDir }
- }
- try {
- if (successRaw) {
- fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(successRaw, null, 2), 'utf8')
- }
- fs.writeFileSync(path.join(workDir, 'vlm_center_parsed.json'), JSON.stringify(aiData, null, 2), 'utf8')
- } catch (_) {}
- const px = Math.round(norm.center_rx * dims.width)
- const py = Math.round(norm.center_ry * dims.height)
- const ix = Math.max(0, Math.min(dims.width - 1, px))
- const iy = Math.max(0, Math.min(dims.height - 1, py))
- let markedScreenshotPath = null
- const mark = drawCenterMarkOnScreenshot(workDir, screenshotPath, ix, iy)
- if (mark.ok) {
- markedScreenshotPath = mark.outPath || null
- } else {
- fs.writeFileSync(
- path.join(workDir, 'screenshot_center_mark_error.txt'),
- String(mark.error || 'unknown'),
- 'utf8'
- )
- }
- fs.writeFileSync(
- path.join(workDir, 'vlm_center_result.json'),
- JSON.stringify(
- {
- center_rx: norm.center_rx,
- center_ry: norm.center_ry,
- pixel_x: ix,
- pixel_y: iy,
- screenshot_width: dims.width,
- screenshot_height: dims.height,
- marked_screenshot: markedScreenshotPath
- ? path.basename(markedScreenshotPath)
- : null,
- marked_screenshot_error: markedScreenshotPath ? null : (mark.error || '未生成标记图'),
- },
- null,
- 2
- ),
- 'utf8'
- )
- return {
- success: true,
- coordinate: { x: ix, y: iy, width: 1, height: 1 },
- clickPosition: { x: ix, y: iy },
- workDir,
- markedScreenshotPath,
- }
- }
- /*
- * ---------- 旧 runPipeline(ROI + 预处理 + NCC,保留勿删)----------
- async function runPipeline_OLD_NCC (device, templateAbsPath, folderPath) {
- if (!device) return { success: false, error: '缺少设备 ID' }
- if (!templateAbsPath || !fs.existsSync(templateAbsPath)) {
- return { success: false, error: `模板不存在: ${templateAbsPath}` }
- }
- if (!fs.existsSync(orbScriptPath)) {
- return { success: false, error: `未找到 ${orbScriptPath}` }
- }
- if (!fs.existsSync(preprocessTemplateScriptPath)) {
- return { success: false, error: `未找到 ${preprocessTemplateScriptPath}` }
- }
- const tmpRoot = resolveWorkflowTmpRoot(folderPath)
- fs.mkdirSync(tmpRoot, { recursive: true })
- const workDir = path.join(tmpRoot, `img-center-${Date.now()}`)
- fs.mkdirSync(workDir, { recursive: true })
- const screenshotPath = path.join(workDir, 'screenshot.png')
- const templateInWork = path.join(workDir, 'template.png')
- const matchedPath = path.join(workDir, 'Matched.png')
- const adbPath = getAdbPath()
- if (!adbScreencapPng(adbPath, device, screenshotPath)) {
- return { success: false, error: 'ADB 截图失败' }
- }
- fs.copyFileSync(templateAbsPath, templateInWork)
- const ai = await callOpenAiRoi(workDir, screenshotPath, templateInWork)
- if (!ai.ok) {
- fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(ai.error || 'unknown'), 'utf8')
- return { success: false, error: ai.error || 'VLM ROI 失败', workDir }
- }
- let payload = normalizeVlmPayload(ai.data)
- if (!isFullScreenRoiHint(payload.roi_hint)) {
- payload = {
- ...payload,
- roi_hint: expandRoiHintNormalized(payload.roi_hint),
- }
- }
- fs.writeFileSync(path.join(workDir, 'vlm_roi.json'), JSON.stringify(payload, null, 2), 'utf8')
- const pythonPath = getPythonPath()
- const prep = runTemplatePreprocess(pythonPath, templateAbsPath, workDir)
- if (!prep.ok) {
- fs.copyFileSync(templateAbsPath, templateInWork)
- try {
- const vrPath = path.join(workDir, 'vlm_roi.json')
- const vr = JSON.parse(fs.readFileSync(vrPath, 'utf8'))
- vr.template_vlm_preprocessed = false
- delete vr.template_preprocess_paths
- fs.writeFileSync(vrPath, JSON.stringify(vr, null, 2), 'utf8')
- } catch (_) {}
- fs.writeFileSync(path.join(workDir, 'template_preprocess_error.txt'), String(prep.error || ''), 'utf8')
- }
- const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
- if (!String(env.IMG_CENTER_NCC_MIN_SCORE || '').trim()) {
- env.IMG_CENTER_NCC_MIN_SCORE = String(IMG_CENTER_NCC_MIN_SCORE_DEFAULT)
- }
- if (process.platform === 'win32') {
- const pyDir = path.dirname(pythonPath)
- const pyRoot = path.dirname(path.dirname(pyDir))
- env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
- }
- const r = spawnSync(
- pythonPath,
- [orbScriptPath, '--work-dir', workDir],
- { encoding: 'utf-8', timeout: PYTHON_ORB_SCRIPT_TIMEOUT_MS, env, cwd: projectRoot }
- )
- if (r.status !== 0) {
- const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || 'NCC 匹配脚本失败'
- return { success: false, error: msg, workDir }
- }
- let out
- try {
- out = JSON.parse(r.stdout.trim())
- } catch (e) {
- return { success: false, error: `脚本输出非 JSON: ${(r.stdout || '').slice(0, 300)}`, workDir }
- }
- if (!out.success) return { success: false, error: out.error || '匹配失败', workDir }
- if (!waitUntilMatchedWritten(matchedPath, MATCHED_PNG_MAX_WAIT_MS)) {
- return { success: false, error: `Matched.png 未就绪: ${matchedPath}`, workDir }
- }
- return {
- success: true,
- coordinate: { x: out.x, y: out.y, width: out.width, height: out.height },
- clickPosition: { x: out.center_x, y: out.center_y },
- workDir,
- }
- }
- * ---------- 旧 runPipeline 结束 ----------
- */
- /**
- * press/locate:Electron 侧 await;返回 { success, coordinate?, clickPosition?, error? }
- * @param {string} [folderPath] 当前流程目录(如 .../static/process/GenerateNote),临时文件写入其下 tmp/
- */
- async function matchImageAndGetCoordinate (device, imagePath, folderPath) {
- const templatePath = path.isAbsolute(imagePath) ? imagePath : path.resolve(projectRoot, imagePath)
- const r = await runPipeline(device, templatePath, folderPath)
- if (!r.success) return { success: false, error: r.error }
- return {
- success: true,
- coordinate: r.coordinate,
- clickPosition: r.clickPosition,
- markedScreenshotPath: r.markedScreenshotPath || null,
- }
- }
- async function executeImgCenterPointLocation ({ device, template, folderPath }) {
- if (!device) return { success: false, error: '缺少设备 ID,无法自动获取截图' }
- if (!template || typeof template !== 'string') return { success: false, error: '缺少模板图片路径(inVars[0])' }
- const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot
- const isAbsoluteOrDrive = template.startsWith('/') || template.includes(':')
- const hasSubPath = template.includes('/') || template.includes(path.sep)
- const templatePath = isAbsoluteOrDrive ? template : (hasSubPath ? path.join(baseDir, template) : path.join(baseDir, 'resources', template))
- const result = await runPipeline(device, templatePath, folderPath)
- if (!result.success) return { success: false, error: result.error }
- const center = result.clickPosition || {
- x: result.coordinate.x + result.coordinate.width / 2,
- y: result.coordinate.y + result.coordinate.height / 2,
- }
- return {
- success: true,
- center,
- coordinate: result.coordinate,
- workDir: result.workDir,
- markedScreenshotPath: result.markedScreenshotPath || null,
- }
- }
- module.exports = {
- tagName,
- schema,
- executeImgCenterPointLocation,
- matchImageAndGetCoordinate,
- /** @deprecated 与 matchImageAndGetCoordinate 相同 */
- matchImageAndGetCoordinateAsync: matchImageAndGetCoordinate,
- /** 解析后的模型名 + 当前 ai 包 baseUrl(测试 / 调试) */
- getImgCenterModel,
- getImgCenterAiMeta,
- /** 中心点 VLM 实际调用链(主 + 备用,≤3) */
- getCenterPointVlmModelChain,
- }
|