|
|
@@ -0,0 +1,1035 @@
|
|
|
+/**
|
|
|
+ * fun 标签:img-center-point-location
|
|
|
+ * 仅需 inVars[0]:模板路径。
|
|
|
+ *
|
|
|
+ * 【当前逻辑】tmp → ADB 截图 + 模板复制 → VLM img2text(主模型 + 最多 2 个备用模型,**共≤3 次**;
|
|
|
+ * 仅在「请求失败」或「无有效中心点坐标」时换模型重试)→ 归一化转像素 →
|
|
|
+ * 可选:Python 在截图上画**绿色圆圈**标出中心点,保存为 `screenshot_center_marked.png`(同一次 `img-center-时间戳` 目录)。
|
|
|
+ *
|
|
|
+ * 【已注释保留】原流程:VLM(ROI + template_crop / need_center_crop / template_scale)→ 外扩 roi →
|
|
|
+ * Python 预处理 → NCC(img-center-orb-akaze.py)。恢复时取消下方对应块注释并改回 runPipeline 即可。
|
|
|
+ *
|
|
|
+ * 密钥与网关:优先根目录 config.js(openaiApiKey 等同步到环境变量),否则 nodejs/ai/config.js;模型常量见文件顶部。
|
|
|
+ */
|
|
|
+
|
|
|
+// ---------------------------------------------------------------------------
|
|
|
+// 配置:模型与超时(优先改这里或对应环境变量 / 根目录 config.js)
|
|
|
+// ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+/**
|
|
|
+ * 多模态模型 id(**仅请求一次** img2text):在此直接写字符串;写 '' 则依次用
|
|
|
+ * process.env.IMG_CENTER_OPENAI_MODEL、config.imgCenterOpenAiModel、nodejs/ai 默认链。
|
|
|
+ */
|
|
|
+// const IMG_CENTER_OPENAI_MODEL = 'gemini-3.1-pro-preview'
|
|
|
+const IMG_CENTER_OPENAI_MODEL = 'gpt-5.4'
|
|
|
+
|
|
|
+/**
|
|
|
+ * 中心点 VLM:主模型请求失败或 JSON 无法解析出有效 center_rx/center_ry 时,依次用备用 1、备用 2 再请求。
|
|
|
+ * 链为 [主模型, 备用1, 备用2] 去重后取前 3 个,**最多 3 次** img2text。
|
|
|
+ * 写 '' 表示该档跳过;环境变量 IMG_CENTER_FALLBACK_MODEL_1 / IMG_CENTER_FALLBACK_MODEL_2 优先于常量。
|
|
|
+ *
|
|
|
+ * Claude 4.6 选型(多模态「对齐两点坐标」类任务,公开对比多认为 Opus 图像分析略强于 Sonnet,但本任务输出极短 JSON):
|
|
|
+ * - 首选备用:**claude-sonnet-4-6**(延迟/成本更友好,视觉足够)。
|
|
|
+ * - 第二轮:**claude-opus-4-6**(能力上限略高,仍不稳再上)。
|
|
|
+ * - *-thinking:推理链更长、更慢更贵;非复杂推理时一般**不必**作默认备用。
|
|
|
+ * OpenAI 网关可改回如 gpt-4o / gpt-4.1。
|
|
|
+ */
|
|
|
+const IMG_CENTER_FALLBACK_MODEL_1 = 'claude-opus-4-6'
|
|
|
+const IMG_CENTER_FALLBACK_MODEL_2 = 'gemini-3.1-pro-preview'
|
|
|
+
|
|
|
+/** 中心点 img2text 最多调用次数(主 + 备用,去重后截断) */
|
|
|
+const IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS = 3
|
|
|
+
|
|
|
+/**
|
|
|
+ * 视觉 API(img2text)请求超时(毫秒)
|
|
|
+ * 环境变量 IMG_CENTER_AI_TIMEOUT_MS 可覆盖
|
|
|
+ */
|
|
|
+const IMG_CENTER_AI_TIMEOUT_MS = Math.max(
|
|
|
+ 10_000,
|
|
|
+ parseInt(String(process.env.IMG_CENTER_AI_TIMEOUT_MS || '').trim(), 10) || 300_000
|
|
|
+)
|
|
|
+
|
|
|
+/** 在截图上绘制中心点绿圈的 Python 脚本超时(毫秒);环境变量 IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS */
|
|
|
+const IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS = Math.max(
|
|
|
+ 5000,
|
|
|
+ parseInt(String(process.env.IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS || '').trim(), 10) || 30_000
|
|
|
+)
|
|
|
+
|
|
|
+/** 【旧 NCC 流程】runPipeline 内等待 Matched.png 就绪的上限(毫秒) */
|
|
|
+const MATCHED_PNG_MAX_WAIT_MS = 60_000
|
|
|
+
|
|
|
+/** 【旧 NCC 流程】Python NCC 匹配脚本 spawn 超时(毫秒) */
|
|
|
+const PYTHON_ORB_SCRIPT_TIMEOUT_MS = 120_000
|
|
|
+
|
|
|
+/**
|
|
|
+ * 【旧 NCC 流程】NCC 最低分:传给 Python(环境变量 IMG_CENTER_NCC_MIN_SCORE)。
|
|
|
+ */
|
|
|
+const IMG_CENTER_NCC_MIN_SCORE_DEFAULT = 0.34
|
|
|
+
|
|
|
+/** 【旧预处理】模板预处理脚本 spawn 超时(毫秒);环境变量 IMG_CENTER_PREPROCESS_TIMEOUT_MS 可覆盖 */
|
|
|
+const PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS = Math.max(
|
|
|
+ 5000,
|
|
|
+ parseInt(String(process.env.IMG_CENTER_PREPROCESS_TIMEOUT_MS || '').trim(), 10) || 60_000
|
|
|
+)
|
|
|
+
|
|
|
+/**
|
|
|
+ * 【旧 VLM ROI】外扩比例,环境变量 IMG_CENTER_ROI_PAD 可覆盖
|
|
|
+ */
|
|
|
+const IMG_CENTER_ROI_PAD = Math.max(
|
|
|
+ 0,
|
|
|
+ Math.min(
|
|
|
+ 0.12,
|
|
|
+ parseFloat(String(process.env.IMG_CENTER_ROI_PAD || '').trim()) || 0.03
|
|
|
+ )
|
|
|
+)
|
|
|
+
|
|
|
+/**
|
|
|
+ * 【旧 VLM ROI】归一化高度下限;IMG_CENTER_ROI_MIN_REL_H,默认 0.15
|
|
|
+ */
|
|
|
+const IMG_CENTER_ROI_MIN_REL_H = Math.max(
|
|
|
+ 0.06,
|
|
|
+ Math.min(
|
|
|
+ 0.45,
|
|
|
+ parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_H || '').trim()) || 0.15
|
|
|
+ )
|
|
|
+)
|
|
|
+
|
|
|
+/**
|
|
|
+ * 【旧 VLM ROI】归一化宽度下限(0 表示不强制);IMG_CENTER_ROI_MIN_REL_W
|
|
|
+ */
|
|
|
+const IMG_CENTER_ROI_MIN_REL_W = (() => {
|
|
|
+ const v = parseFloat(String(process.env.IMG_CENTER_ROI_MIN_REL_W || '').trim())
|
|
|
+ if (!Number.isFinite(v) || v <= 0) return 0
|
|
|
+ return Math.max(0.06, Math.min(0.95, v))
|
|
|
+})()
|
|
|
+
|
|
|
+// ---------------------------------------------------------------------------
|
|
|
+// 依赖与工程路径
|
|
|
+// ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+const path = require('path')
|
|
|
+const fs = require('fs')
|
|
|
+const { spawnSync } = require('child_process')
|
|
|
+const { getPythonExeFromConfig } = require('../../../../python-exe-from-config.js')
|
|
|
+
|
|
|
+const configPath = process.env.STATIC_ROOT
|
|
|
+ ? path.join(path.dirname(path.resolve(process.env.STATIC_ROOT)), 'config.js')
|
|
|
+ : path.join(__dirname, '..', '..', '..', '..', '..', 'config.js')
|
|
|
+const config = fs.existsSync(configPath) ? require(configPath) : {}
|
|
|
+const projectRoot = (config.projectRoot && fs.existsSync(config.projectRoot))
|
|
|
+ ? config.projectRoot
|
|
|
+ : path.dirname(path.resolve(configPath))
|
|
|
+
|
|
|
+/** 在加载 nodejs/ai 之前同步,使 ai/config 能读到应用级密钥与网关 */
|
|
|
+function syncProcessEnvFromAppConfig () {
|
|
|
+ const k = config.openaiApiKey || config.vlmApiKey
|
|
|
+ if (
|
|
|
+ k &&
|
|
|
+ !String(process.env.API_KEY || '').trim() &&
|
|
|
+ !String(process.env.OPENAI_API_KEY || '').trim() &&
|
|
|
+ !String(process.env.VLM_API_KEY || '').trim()
|
|
|
+ ) {
|
|
|
+ process.env.API_KEY = String(k).trim()
|
|
|
+ }
|
|
|
+ const u = config.openaiApiUrl
|
|
|
+ if (
|
|
|
+ u &&
|
|
|
+ !String(process.env.BASE_URL || '').trim() &&
|
|
|
+ !String(process.env.OPENAI_API_URL || '').trim()
|
|
|
+ ) {
|
|
|
+ process.env.BASE_URL = String(u).trim().replace(/\/$/, '')
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+syncProcessEnvFromAppConfig()
|
|
|
+
|
|
|
+const aiRoot = path.join(__dirname, '..', '..', '..', '..', 'ai')
|
|
|
+const aiModule = require(path.join(aiRoot, 'ai.js'))
|
|
|
+const img2textRequest = require(path.join(aiRoot, 'request', 'img2text.js'))
|
|
|
+const aiPackageConfig = require(path.join(aiRoot, 'config.js'))
|
|
|
+
|
|
|
+/** 【旧 NCC / 预处理】脚本路径(当前 runPipeline 不调用;恢复旧流程时用) */
|
|
|
+const orbScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-orb-akaze.py')
|
|
|
+const preprocessTemplateScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-preprocess-template.py')
|
|
|
+/** 在截图上标出 VLM 中心点(绿圈)的可视化脚本 */
|
|
|
+const markCenterScriptPath = path.join(projectRoot, 'python', 'scripts', 'img-center-mark-center.py')
|
|
|
+
|
|
|
+/**
|
|
|
+ * 【当前】仅问中心点:图1=截图,图2=模板;返回 center_rx、center_ry ∈ [0,1](相对图1 宽高)。
|
|
|
+ */
|
|
|
+const VLM_CENTER_ONLY_PROMPT = `你收到两张图,顺序固定:
|
|
|
+图1:Android 手机完整截图(与 adb screencap 一致),逻辑像素宽约 W、高约 H;坐标原点在左上角,x 向右、y 向下。
|
|
|
+图2:模板图。请在图1 中找到与图2 视觉上对应的同一区域(同一图标、缩略图格子、按钮等)。
|
|
|
+
|
|
|
+任务:给出图2 在图1 中**匹配可见区域的几何中心点**(该区域中心,不是图2 整张文件的画布中心)。
|
|
|
+
|
|
|
+只输出一个 JSON 对象,必须包含:
|
|
|
+- "center_rx"、"center_ry":数字,取值在 [0,1],分别为该中心点在图1 上相对宽度 W、高度 H 的归一化坐标(左缘=0,上缘=0)。
|
|
|
+若图1 中完全无法对应图2,两个键均填 null。
|
|
|
+
|
|
|
+禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。`
|
|
|
+
|
|
|
+/*
|
|
|
+ * ---------- 旧 ROI + 模板几何 VLM 提示(NCC 流程用,保留勿删)----------
|
|
|
+const VLM_USER_PROMPT = `你收到两张图,顺序固定:
|
|
|
+图1:Android 手机竖屏完整截图,逻辑像素宽 W、高 H。
|
|
|
+图2:模板图——要在图1画面里定位的同一内容。常见情况:图2 是 PC/自动化侧通过 ADB 推到手机上的**原始文件**(与磁盘上打开的同一像素内容),**不是**相册 App 已处理后的内部版本。图1 里若出现该图,往往是**系统相册/图库**里的**网格缩略图**:会先对原图做**缩小**,且方格多为**近似正方形**,竖图/横图常被**居中裁成方图**再显示,与图2 原始长宽比可能完全不同。
|
|
|
+
|
|
|
+任务:在图1中找到与图2视觉上对应的那一块区域(同一控件、同一相册格里的缩略图等),给出「搜索矩形」roi_hint;并在适用时给出 template_crop、template_scale、need_center_crop,使下游能**复现相册里那一格的「裁切 + 缩放」**,用于模板匹配。
|
|
|
+
|
|
|
+硬性规则(roi_hint):
|
|
|
+- roi_hint 的四个数必须是相对图1 的归一化坐标:rx0,ry0 为矩形左上角,rx1,ry1 为右下角,均在 [0,1],且 rx0<rx1、ry0<ry1。
|
|
|
+- 【只框图2】图2 里**只有**某一图标/按钮时,roi 应主要覆盖图1里**与之对应的那一块**,不要为了「多装点内容」而把**竖直方向**上、与图2无关的相邻 Tab 图标(例如在图1里与目标**横向并排**的其它底栏图标)一起框进竖长条;底栏场景下各图标是**横排**的,roi 应是**偏横向的条带**包住目标及窄边距,而不是上下堆叠多个无关图标。
|
|
|
+- 【必须完整】图2 模板在图1里所对应的那一整块 UI(含圆角、描边、阴影等可见像素)必须**全部**落在 roi 矩形内部,**任何一边都不得裁切**到模板上的图形;若宁可 roi 明显大一点也要保证完整。
|
|
|
+- 【自检】若你意识到按当前四个数裁图会「切掉」图2 上任意可见部分(例如只框到红色按钮的下半截、圆角被切、+ 号缺一截),**必须**把 ry0 上移或 ry1 下移、或放宽 rx,直到不会裁切。
|
|
|
+- 【底部栏】目标在屏底导航栏时:roi 的纵向高度 (ry1−ry0) 建议至少为屏高的 **14%~22%**,且 ry0 要足够靠上,使整块圆形/圆角按钮(含完整外轮廓)都在框内;**禁止**高度小于屏高 **12%** 的扁条。横向 (rx1−rx0) 以刚好包住目标按钮宽度 + 左右各约 5%~15% 屏宽为宜。
|
|
|
+- 【可大一些】在已完整包含上述目标的前提下,roi 宁可略大勿小:各方向外扩边距避免贴边裁切;图2 近似正方形时,roi **勿**做成「竖远长于横」的窄竖条(除非图2 本身就是竖长条)。
|
|
|
+- 若图1中存在多处相似元素,选择与图2内容最一致、最可能是用户意图的一处;若完全无法对应,四个坐标全填 null。
|
|
|
+
|
|
|
+template_scale(模拟相册把图缩小进格子的比例):
|
|
|
+- 数字,范围建议 0.05~1.0,表示在**已按下方顺序处理完 template_crop 与 need_center_crop 之后**,对图2 再做**等比线性缩放**(宽、高同比例);1 表示不在此步缩小。
|
|
|
+- 估法:对比**图1 里目标格中的缩略图**与**图2 原始文件**——若屏上格子里的内容明显是「整图缩小后的局部/整体」,应给 **小于 1**(相册格常见约 **0.2~0.5**,视分辨率与格大小而定);若图1 里几乎 1:1 对应原图2 像素内容则接近 1。
|
|
|
+- **不要**把 template_scale 理解成随意数字;应绑定:**对齐「ADB 原图 → 相册网格里显示尺寸」的缩放**。
|
|
|
+
|
|
|
+template_crop(相对**原始图2** 宽高的归一化矩形 cx0,cy0,cx1,cy1 ∈ [0,1],cx0<cx1、cy0<cy1):
|
|
|
+- **相册/多宫格场景**:表示「图1 里那一格缩略图**所对应的原图2 上的可见区域**」——即:若把原图2 按相册逻辑裁切后才会得到与格内一致的画面,应用此矩形框出原图2 上的该区域。竖长图在方格里通常只显示**中间一条/一块**,此时 crop 应是**接近正方形或略竖/略横的矩形**,**不要**默认填 0,0,1,1 除非图1 明确显示的是「整图缩进格内、无裁切」。
|
|
|
+- **非相册场景**(图标、按钮、整段 UI):可表示去掉白边、只保留主体;无需裁切则 0,0,1,1。
|
|
|
+- 若你能较准给出「格内所见 ↔ 原图2」的对应关系,应优先给出**非全图**的 template_crop;与 need_center_crop 配合见下。
|
|
|
+
|
|
|
+need_center_crop:
|
|
|
+- 当图1 明显是「相册/多宫格选图」、缩略图为**方格**且图2 与格内显示的长宽比不一致(典型:竖图进方格)时为 **true**;否则 **false**。
|
|
|
+- 为 true 时:roi_hint 仍只框**与图2 对应的那一格**(含格线外极少边距)。下游会在 template_crop 结果上再作**居中取最大正方形**,以逼近系统相册方格裁切;因此若你已在 template_crop 里给出了**精确的方格可见区域**(本身已接近正方形),可将 need_center_crop 设为 **false**,避免几何重复。
|
|
|
+- 若相册场景下**无法**可靠估计 template_crop,可 **template_crop 填 0,0,1,1** 且 **need_center_crop 为 true**,由程序用「整图居中裁方」兜底。
|
|
|
+
|
|
|
+confidence:0~1,可随意填;下游**不使用**该字段做拒识,仅作记录。
|
|
|
+
|
|
|
+只输出一个 JSON 对象,顶层键必须包含:roi_hint、need_center_crop、confidence、template_scale、template_crop。
|
|
|
+roi_hint 为对象,键 "rx0","ry0","rx1","ry1"(数字或 null)。
|
|
|
+template_crop 为对象,键 "cx0","cy0","cx1","cy1"(数字)。
|
|
|
+禁止 markdown、禁止代码围栏、禁止 JSON 以外的任何文字。`
|
|
|
+ * ---------- 旧 VLM_USER_PROMPT 结束 ----------
|
|
|
+ */
|
|
|
+
|
|
|
+const tagName = 'img-center-point-location'
|
|
|
+
|
|
|
+const schema = {
|
|
|
+ description:
|
|
|
+ '在屏幕截图中查找模板并返回中心点;主模型无效坐标时自动换备用模型(最多 3 次)。成功时保存 screenshot_center_marked.png。原 ROI+预处理+NCC 已注释保留。',
|
|
|
+ inputs: { template: '模板图片路径(inVars[0])', variable: '输出变量名(outVars)' },
|
|
|
+ outputs: { variable: '中心点 {x,y}(对象)' },
|
|
|
+}
|
|
|
+
|
|
|
+// ---------------------------------------------------------------------------
|
|
|
+// 以下为 runPipeline 主流程中的调用顺序(自上而下与执行顺序一致)
|
|
|
+// ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+/** 流程目录下的 tmp,例如 static/process/GenerateNote/tmp */
|
|
|
+function resolveWorkflowTmpRoot (folderPath) {
|
|
|
+ if (folderPath && typeof folderPath === 'string') {
|
|
|
+ const fp = path.isAbsolute(folderPath) ? folderPath : path.join(projectRoot, folderPath)
|
|
|
+ return path.join(fp, 'tmp')
|
|
|
+ }
|
|
|
+ return path.join(projectRoot, 'tmp')
|
|
|
+}
|
|
|
+
|
|
|
+function getAdbPath () {
|
|
|
+ return config.adbPath?.path
|
|
|
+ ? (path.isAbsolute(config.adbPath.path) ? config.adbPath.path : path.resolve(projectRoot, config.adbPath.path))
|
|
|
+ : path.join(projectRoot, 'lib', 'scrcpy-adb', process.platform === 'win32' ? 'adb.exe' : 'adb')
|
|
|
+}
|
|
|
+
|
|
|
+function adbScreencapPng (adbPath, device, outFile) {
|
|
|
+ const r = spawnSync(adbPath, ['-s', device, 'exec-out', 'screencap', '-p'], {
|
|
|
+ encoding: 'buffer',
|
|
|
+ maxBuffer: 40 * 1024 * 1024,
|
|
|
+ windowsHide: true,
|
|
|
+ })
|
|
|
+ if (r.status !== 0 || !r.stdout || r.stdout.length < 100) return false
|
|
|
+ fs.mkdirSync(path.dirname(outFile), { recursive: true })
|
|
|
+ fs.writeFileSync(outFile, r.stdout)
|
|
|
+ return true
|
|
|
+}
|
|
|
+
|
|
|
+function fileToDataUrlPng (absPath) {
|
|
|
+ const buf = fs.readFileSync(absPath)
|
|
|
+ const b64 = buf.toString('base64')
|
|
|
+ return `data:image/png;base64,${b64}`
|
|
|
+}
|
|
|
+
|
|
|
+/** 从模型返回文本中抽出 JSON 对象 */
|
|
|
+function parseVlmJson (text) {
|
|
|
+ let s = String(text || '').trim()
|
|
|
+ const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/i)
|
|
|
+ if (fence) s = fence[1].trim()
|
|
|
+ const m = s.match(/\{[\s\S]*\}/)
|
|
|
+ if (!m) return null
|
|
|
+ try {
|
|
|
+ return JSON.parse(m[0])
|
|
|
+ } catch (_) {
|
|
|
+ return null
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/** 读取 PNG IHDR 宽高(adb screencap -p 为 PNG) */
|
|
|
+function readPngIhdrDimensions (absPath) {
|
|
|
+ try {
|
|
|
+ const fd = fs.openSync(absPath, 'r')
|
|
|
+ const buf = Buffer.allocUnsafe(24)
|
|
|
+ fs.readSync(fd, buf, 0, 24, 0)
|
|
|
+ fs.closeSync(fd)
|
|
|
+ if (buf.length < 24 || buf[0] !== 0x89) return null
|
|
|
+ const w = buf.readUInt32BE(16)
|
|
|
+ const h = buf.readUInt32BE(20)
|
|
|
+ if (!Number.isFinite(w) || !Number.isFinite(h) || w < 1 || h < 1) return null
|
|
|
+ return { width: w, height: h }
|
|
|
+ } catch (_) {
|
|
|
+ return null
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+function clamp01 (x) {
|
|
|
+ if (x === null || x === undefined) return null
|
|
|
+ const n = Number(x)
|
|
|
+ if (!Number.isFinite(n)) return null
|
|
|
+ return Math.max(0, Math.min(1, n))
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 从 VLM JSON 解析归一化中心;优先 center_rx/center_ry,其次 cx/cy;
|
|
|
+ * 若仅有 center_x/center_y:大于 1 时按像素除以 width/height,否则按归一化。
|
|
|
+ */
|
|
|
+function parseCenterNormalizedFromVlm (parsed, width, height) {
|
|
|
+ if (!parsed || typeof parsed !== 'object') return null
|
|
|
+ let crx = clamp01(parsed.center_rx)
|
|
|
+ let cry = clamp01(parsed.center_ry)
|
|
|
+ if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
|
|
|
+
|
|
|
+ crx = clamp01(parsed.cx)
|
|
|
+ cry = clamp01(parsed.cy)
|
|
|
+ if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
|
|
|
+
|
|
|
+ const px = Number(parsed.center_x)
|
|
|
+ const py = Number(parsed.center_y)
|
|
|
+ if (!Number.isFinite(px) || !Number.isFinite(py)) return null
|
|
|
+ if (width > 0 && height > 0 && (px > 1 || py > 1)) {
|
|
|
+ crx = clamp01(px / width)
|
|
|
+ cry = clamp01(py / height)
|
|
|
+ if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
|
|
|
+ }
|
|
|
+ crx = clamp01(px)
|
|
|
+ cry = clamp01(py)
|
|
|
+ if (crx != null && cry != null) return { center_rx: crx, center_ry: cry }
|
|
|
+ return null
|
|
|
+}
|
|
|
+
|
|
|
+/** 实际发往 API 的模型名:见顶部 IMG_CENTER_OPENAI_MODEL → env / config → nodejs/ai。 */
|
|
|
+function getImgCenterModel () {
|
|
|
+ const explicit =
|
|
|
+ String(IMG_CENTER_OPENAI_MODEL || '').trim() ||
|
|
|
+ String(process.env.IMG_CENTER_OPENAI_MODEL || '').trim() ||
|
|
|
+ (config.imgCenterOpenAiModel && String(config.imgCenterOpenAiModel).trim()) ||
|
|
|
+ ''
|
|
|
+ return img2textRequest.resolveImgCenterModel(explicit || undefined)
|
|
|
+}
|
|
|
+
|
|
|
+/** 第一次仅用 OpenAI 兼容网关;若配置成 doubao 则改用 nodejs/ai 的 IMG_CENTER_MODEL(避免首轮即豆包) */
|
|
|
+function getPrimaryOpenAiImgCenterModel () {
|
|
|
+ const m = getImgCenterModel()
|
|
|
+ if (m && String(m).toLowerCase() === 'doubao') {
|
|
|
+ const fb = String(aiPackageConfig.IMG_CENTER_MODEL || '').trim()
|
|
|
+ if (fb && fb.toLowerCase() !== 'doubao') return fb
|
|
|
+ return 'gpt-5.4'
|
|
|
+ }
|
|
|
+ return m
|
|
|
+}
|
|
|
+
|
|
|
+function resolveFallbackCenterModelId (envKey, constVal) {
|
|
|
+ const e = String(process.env[envKey] || '').trim()
|
|
|
+ if (e) return e
|
|
|
+ return String(constVal != null ? constVal : '').trim()
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 中心点 img2text 模型链:[主模型, 备用1, 备用2] 去重后取前 IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS 个。
|
|
|
+ */
|
|
|
+function getCenterPointVlmModelChain () {
|
|
|
+ const primary = String(getPrimaryOpenAiImgCenterModel() || '').trim()
|
|
|
+ const fb1 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1)
|
|
|
+ const fb2 = resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2)
|
|
|
+ const chain = []
|
|
|
+ const push = (id) => {
|
|
|
+ const x = String(id || '').trim()
|
|
|
+ if (!x) return
|
|
|
+ if (!chain.includes(x)) chain.push(x)
|
|
|
+ }
|
|
|
+ push(primary)
|
|
|
+ push(fb1)
|
|
|
+ push(fb2)
|
|
|
+ return chain.slice(0, IMG_CENTER_CENTER_POINT_MAX_VLM_CALLS)
|
|
|
+}
|
|
|
+
|
|
|
+function getImgCenterAiMeta () {
|
|
|
+ return {
|
|
|
+ model: getImgCenterModel(),
|
|
|
+ primaryOpenAiModel: getPrimaryOpenAiImgCenterModel(),
|
|
|
+ centerPointVlmModelChain: getCenterPointVlmModelChain(),
|
|
|
+ fallbackModel1: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_1', IMG_CENTER_FALLBACK_MODEL_1),
|
|
|
+ fallbackModel2: resolveFallbackCenterModelId('IMG_CENTER_FALLBACK_MODEL_2', IMG_CENTER_FALLBACK_MODEL_2),
|
|
|
+ baseUrl: aiPackageConfig.BASE_URL,
|
|
|
+ openAiKeyConfigured: !!(aiPackageConfig.API_KEY && String(aiPackageConfig.API_KEY).trim()),
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 【当前】单次 img2text(指定 model);每次尝试写入 openai_raw_attempt_{i}.json。
|
|
|
+ * @returns {{ ok: boolean, data?: object, error?: string, model: string, rawResp?: object }}
|
|
|
+ */
|
|
|
+async function callOpenAiCenterPointWithModel (workDir, screenshotPath, templatePath, modelName, attemptIndex) {
|
|
|
+ syncProcessEnvFromAppConfig()
|
|
|
+ const openAiKey = String(aiPackageConfig.API_KEY || '').trim()
|
|
|
+ const model = String(modelName || '').trim()
|
|
|
+ const screenUrl = fileToDataUrlPng(screenshotPath)
|
|
|
+ const tplUrl = fileToDataUrlPng(templatePath)
|
|
|
+ const imageUrls = [screenUrl, tplUrl]
|
|
|
+ try {
|
|
|
+ if (!openAiKey) {
|
|
|
+ return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', model }
|
|
|
+ }
|
|
|
+ if (!model) {
|
|
|
+ return { ok: false, error: '模型 id 为空', model }
|
|
|
+ }
|
|
|
+ const result = await aiModule.run('img2text', VLM_CENTER_ONLY_PROMPT, imageUrls, {
|
|
|
+ timeoutMs: IMG_CENTER_AI_TIMEOUT_MS,
|
|
|
+ model,
|
|
|
+ })
|
|
|
+ const resp = result.data
|
|
|
+ const attemptPayload = {
|
|
|
+ model,
|
|
|
+ attemptIndex,
|
|
|
+ httpSuccess: result.success,
|
|
|
+ httpError: result.success ? null : (result.error || null),
|
|
|
+ response: resp,
|
|
|
+ }
|
|
|
+ fs.writeFileSync(
|
|
|
+ path.join(workDir, `openai_raw_attempt_${attemptIndex}.json`),
|
|
|
+ JSON.stringify(attemptPayload, null, 2),
|
|
|
+ 'utf8'
|
|
|
+ )
|
|
|
+ if (!result.success) {
|
|
|
+ return { ok: false, error: result.error || 'VLM 请求失败', model }
|
|
|
+ }
|
|
|
+ const content = resp?.choices?.[0]?.message?.content
|
|
|
+ const parsed = parseVlmJson(content)
|
|
|
+ if (!parsed || typeof parsed !== 'object') {
|
|
|
+ return { ok: false, error: '无法解析模型返回为 JSON', model, rawResp: resp }
|
|
|
+ }
|
|
|
+ return { ok: true, data: parsed, model, rawResp: resp }
|
|
|
+ } catch (e) {
|
|
|
+ const msg = e && e.message ? e.message : String(e)
|
|
|
+ try {
|
|
|
+ fs.writeFileSync(path.join(workDir, `openai_error_attempt_${attemptIndex}.txt`), msg, 'utf8')
|
|
|
+ } catch (_) {}
|
|
|
+ return { ok: false, error: msg, model }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ---------- 旧 callOpenAiRoi(ROI + 模板几何,NCC 流程用,保留勿删)----------
|
|
|
+async function callOpenAiRoi (workDir, screenshotPath, templatePath) {
|
|
|
+ syncProcessEnvFromAppConfig()
|
|
|
+ const openAiKey = String(aiPackageConfig.API_KEY || '').trim()
|
|
|
+
|
|
|
+ const screenUrl = fileToDataUrlPng(screenshotPath)
|
|
|
+ const tplUrl = fileToDataUrlPng(templatePath)
|
|
|
+ const imageUrls = [screenUrl, tplUrl]
|
|
|
+
|
|
|
+ const emptyFallback = {
|
|
|
+ roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 },
|
|
|
+ need_center_crop: false,
|
|
|
+ confidence: 0,
|
|
|
+ template_scale: 1,
|
|
|
+ template_crop: { cx0: 0, cy0: 0, cx1: 1, cy1: 1 },
|
|
|
+ template_vlm_preprocessed: false,
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ if (!openAiKey) {
|
|
|
+ return { ok: false, error: '缺少 OpenAI 兼容 API_KEY', fallback: emptyFallback }
|
|
|
+ }
|
|
|
+ const openAiModel = getPrimaryOpenAiImgCenterModel()
|
|
|
+ const result = await aiModule.run('img2text', VLM_USER_PROMPT, imageUrls, {
|
|
|
+ timeoutMs: IMG_CENTER_AI_TIMEOUT_MS,
|
|
|
+ model: openAiModel,
|
|
|
+ })
|
|
|
+ if (!result.success) {
|
|
|
+ return { ok: false, error: result.error || 'VLM 请求失败', fallback: emptyFallback }
|
|
|
+ }
|
|
|
+ const resp = result.data
|
|
|
+ const content = resp?.choices?.[0]?.message?.content
|
|
|
+ const parsed = parseVlmJson(content)
|
|
|
+ if (!parsed || typeof parsed !== 'object') {
|
|
|
+ return { ok: false, error: '无法解析模型返回为 JSON', fallback: emptyFallback }
|
|
|
+ }
|
|
|
+ fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(resp, null, 2), 'utf8')
|
|
|
+ return { ok: true, data: parsed }
|
|
|
+ } catch (e) {
|
|
|
+ const msg = e && e.message ? e.message : String(e)
|
|
|
+ return { ok: false, error: msg, fallback: emptyFallback }
|
|
|
+ }
|
|
|
+}
|
|
|
+ * ---------- 旧 callOpenAiRoi 结束 ----------
|
|
|
+ */
|
|
|
+
|
|
|
+/*
|
|
|
+ * ---------- 旧 normalizeTemplateGeometry / normalizeVlmPayload / expandRoiHintNormalized / isFullScreenRoiHint(保留勿删)----------
|
|
|
+function normalizeTemplateGeometry (obj) {
|
|
|
+ const c = obj && obj.template_crop
|
|
|
+ let cx0 = 0
|
|
|
+ let cy0 = 0
|
|
|
+ let cx1 = 1
|
|
|
+ let cy1 = 1
|
|
|
+ if (c && typeof c === 'object') {
|
|
|
+ const n = (k, d) => {
|
|
|
+ const v = Number(c[k])
|
|
|
+ return Number.isFinite(v) ? Math.max(0, Math.min(1, v)) : d
|
|
|
+ }
|
|
|
+ cx0 = n('cx0', 0)
|
|
|
+ cy0 = n('cy0', 0)
|
|
|
+ cx1 = n('cx1', 1)
|
|
|
+ cy1 = n('cy1', 1)
|
|
|
+ if (cx1 <= cx0) {
|
|
|
+ cx0 = 0
|
|
|
+ cx1 = 1
|
|
|
+ }
|
|
|
+ if (cy1 <= cy0) {
|
|
|
+ cy0 = 0
|
|
|
+ cy1 = 1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ let sc = Number(obj && obj.template_scale)
|
|
|
+ if (!Number.isFinite(sc) || sc <= 0) sc = 1
|
|
|
+ sc = Math.max(0.05, Math.min(1, sc))
|
|
|
+ return {
|
|
|
+ template_crop: { cx0, cy0, cx1, cy1 },
|
|
|
+ template_scale: sc,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+function normalizeVlmPayload (obj) {
|
|
|
+ const rh = obj.roi_hint || {}
|
|
|
+ const nums = ['rx0', 'ry0', 'rx1', 'ry1']
|
|
|
+ let bad = false
|
|
|
+ for (const k of nums) {
|
|
|
+ const v = rh[k]
|
|
|
+ if (v === null || v === undefined) bad = true
|
|
|
+ }
|
|
|
+ const tg = normalizeTemplateGeometry(obj)
|
|
|
+ if (bad) {
|
|
|
+ return {
|
|
|
+ roi_hint: { rx0: 0, ry0: 0, rx1: 1, ry1: 1 },
|
|
|
+ need_center_crop: false,
|
|
|
+ confidence: 0,
|
|
|
+ template_scale: tg.template_scale,
|
|
|
+ template_crop: tg.template_crop,
|
|
|
+ template_vlm_preprocessed: false,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ const c = Number(obj.confidence)
|
|
|
+ const conf = Number.isFinite(c) ? Math.max(0, Math.min(1, c)) : 0
|
|
|
+ return {
|
|
|
+ roi_hint: {
|
|
|
+ rx0: Number(rh.rx0),
|
|
|
+ ry0: Number(rh.ry0),
|
|
|
+ rx1: Number(rh.rx1),
|
|
|
+ ry1: Number(rh.ry1),
|
|
|
+ },
|
|
|
+ need_center_crop: !!obj.need_center_crop,
|
|
|
+ confidence: conf,
|
|
|
+ template_scale: tg.template_scale,
|
|
|
+ template_crop: tg.template_crop,
|
|
|
+ template_vlm_preprocessed: false,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+function expandRoiHintNormalized (rh) {
|
|
|
+ let rx0 = Number(rh.rx0)
|
|
|
+ let ry0 = Number(rh.ry0)
|
|
|
+ let rx1 = Number(rh.rx1)
|
|
|
+ let ry1 = Number(rh.ry1)
|
|
|
+ if (!(rx1 > rx0 && ry1 > ry0)) return rh
|
|
|
+
|
|
|
+ const pad = IMG_CENTER_ROI_PAD
|
|
|
+ rx0 = Math.max(0, rx0 - pad)
|
|
|
+ ry0 = Math.max(0, ry0 - pad)
|
|
|
+ rx1 = Math.min(1, rx1 + pad)
|
|
|
+ ry1 = Math.min(1, ry1 + pad)
|
|
|
+
|
|
|
+ let h = ry1 - ry0
|
|
|
+ const hMin = IMG_CENTER_ROI_MIN_REL_H
|
|
|
+ if (h < hMin) {
|
|
|
+ const deficit = hMin - h
|
|
|
+ const bottomAnchored = ry1 >= 0.88
|
|
|
+ if (bottomAnchored) {
|
|
|
+ ry0 = Math.max(0, ry0 - deficit)
|
|
|
+ h = ry1 - ry0
|
|
|
+ if (h < hMin) {
|
|
|
+ ry1 = Math.min(1, ry0 + hMin)
|
|
|
+ h = ry1 - ry0
|
|
|
+ if (h < hMin) ry0 = Math.max(0, ry1 - hMin)
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ const cy = (ry0 + ry1) / 2
|
|
|
+ ry0 = Math.max(0, cy - hMin / 2)
|
|
|
+ ry1 = Math.min(1, ry0 + hMin)
|
|
|
+ if (ry1 >= 1 - 1e-9) ry0 = Math.max(0, 1 - hMin)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ const wMin = IMG_CENTER_ROI_MIN_REL_W
|
|
|
+ if (wMin > 0) {
|
|
|
+ let w = rx1 - rx0
|
|
|
+ if (w < wMin) {
|
|
|
+ const cx = (rx0 + rx1) / 2
|
|
|
+ rx0 = Math.max(0, cx - wMin / 2)
|
|
|
+ rx1 = Math.min(1, rx0 + wMin)
|
|
|
+ if (rx1 >= 1 - 1e-9) rx0 = Math.max(0, 1 - wMin)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return { rx0, ry0, rx1, ry1 }
|
|
|
+}
|
|
|
+
|
|
|
+function isFullScreenRoiHint (rh) {
|
|
|
+ return (
|
|
|
+ Math.abs(Number(rh.rx0)) < 1e-9 &&
|
|
|
+ Math.abs(Number(rh.ry0)) < 1e-9 &&
|
|
|
+ Math.abs(Number(rh.rx1) - 1) < 1e-9 &&
|
|
|
+ Math.abs(Number(rh.ry1) - 1) < 1e-9
|
|
|
+ )
|
|
|
+}
|
|
|
+ * ---------- 旧几何归一化结束 ----------
|
|
|
+ */
|
|
|
+
|
|
|
+/*
|
|
|
+ * ---------- 旧 getPythonPath / runTemplatePreprocess / waitUntilMatchedWritten(保留勿删)----------
|
|
|
+function getPythonPath () {
|
|
|
+ const base = config.pythonPath?.path || config.pythonVenvPath || path.join(projectRoot, 'python', process.arch === 'arm64' ? 'arm64' : 'x64')
|
|
|
+ const envPy = path.join(base, 'env', 'Scripts', 'python.exe')
|
|
|
+ const scriptsPy = path.join(base, 'Scripts', 'python.exe')
|
|
|
+ const pyEmbedded = path.join(base, 'py', 'python.exe')
|
|
|
+ if (fs.existsSync(envPy)) return envPy
|
|
|
+ if (fs.existsSync(scriptsPy)) return scriptsPy
|
|
|
+ if (fs.existsSync(pyEmbedded)) return pyEmbedded
|
|
|
+ return 'python'
|
|
|
+}
|
|
|
+
|
|
|
+function runTemplatePreprocess (pythonPath, templateAbsPath, workDir) {
|
|
|
+ if (!fs.existsSync(preprocessTemplateScriptPath)) {
|
|
|
+ return { ok: false, error: `未找到 ${preprocessTemplateScriptPath}` }
|
|
|
+ }
|
|
|
+ const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
|
|
|
+ if (process.platform === 'win32') {
|
|
|
+ const pyDir = path.dirname(pythonPath)
|
|
|
+ const pyRoot = path.dirname(path.dirname(pyDir))
|
|
|
+ env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
|
|
|
+ }
|
|
|
+ const r = spawnSync(
|
|
|
+ pythonPath,
|
|
|
+ [preprocessTemplateScriptPath, '--src', templateAbsPath, '--work-dir', workDir],
|
|
|
+ { encoding: 'utf-8', timeout: PYTHON_PREPROCESS_TEMPLATE_TIMEOUT_MS, env, cwd: projectRoot, windowsHide: true }
|
|
|
+ )
|
|
|
+ if (r.status !== 0) {
|
|
|
+ const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '模板预处理失败'
|
|
|
+ return { ok: false, error: msg }
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || ''
|
|
|
+ const j = JSON.parse(tail)
|
|
|
+ if (!j.success) return { ok: false, error: j.error || '模板预处理失败' }
|
|
|
+ return { ok: true, meta: j }
|
|
|
+ } catch (e) {
|
|
|
+ return { ok: false, error: '模板预处理输出非 JSON' }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+function waitUntilMatchedWritten (absPath, maxMs) {
|
|
|
+ const matchedWaitMs =
|
|
|
+ maxMs != null
|
|
|
+ ? maxMs
|
|
|
+ : (process.env.IMG_MATCH_MATCHED_WAIT_MS
|
|
|
+ ? Math.max(5000, parseInt(process.env.IMG_MATCH_MATCHED_WAIT_MS, 10) || 30000)
|
|
|
+ : 30000)
|
|
|
+ if (!absPath) return true
|
|
|
+ const t0 = Date.now()
|
|
|
+ let lastSize = -1
|
|
|
+ let stableStart = 0
|
|
|
+ const STABLE_MS = 120
|
|
|
+ while (Date.now() - t0 < matchedWaitMs) {
|
|
|
+ try {
|
|
|
+ if (fs.existsSync(absPath)) {
|
|
|
+ const st = fs.statSync(absPath)
|
|
|
+ if (st.size >= 32) {
|
|
|
+ if (st.size === lastSize) {
|
|
|
+ if (Date.now() - stableStart >= STABLE_MS) return true
|
|
|
+ } else {
|
|
|
+ lastSize = st.size
|
|
|
+ stableStart = Date.now()
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (_) {}
|
|
|
+ const until = Date.now() + 35
|
|
|
+ while (Date.now() < until) {}
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ return fs.existsSync(absPath) && fs.statSync(absPath).size >= 32
|
|
|
+ } catch (_) {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+}
|
|
|
+ * ---------- 旧 getPythonPath / 预处理 / Matched 等待结束 ----------
|
|
|
+ */
|
|
|
+
|
|
|
+function resolvePythonExecutable () {
|
|
|
+ return getPythonExeFromConfig(config)
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 在 screenshot 上以绿色空心圆标出中心点,写入 workDir/screenshot_center_marked.png。
|
|
|
+ * 圆半径可由环境变量 IMG_CENTER_MARK_RADIUS(正整数像素)覆盖,否则由脚本按分辨率估算。
|
|
|
+ * @returns {{ ok: boolean, outPath?: string, error?: string }}
|
|
|
+ */
|
|
|
+function drawCenterMarkOnScreenshot (workDir, screenshotPath, centerX, centerY) {
|
|
|
+ if (!fs.existsSync(markCenterScriptPath)) {
|
|
|
+ return { ok: false, error: `未找到 ${markCenterScriptPath}` }
|
|
|
+ }
|
|
|
+ const outPath = path.join(workDir, 'screenshot_center_marked.png')
|
|
|
+ const pythonPath = resolvePythonExecutable()
|
|
|
+ const args = [
|
|
|
+ markCenterScriptPath,
|
|
|
+ '--input', screenshotPath,
|
|
|
+ '--output', outPath,
|
|
|
+ '--x', String(Math.round(centerX)),
|
|
|
+ '--y', String(Math.round(centerY)),
|
|
|
+ ]
|
|
|
+ const rEnv = parseInt(String(process.env.IMG_CENTER_MARK_RADIUS || '').trim(), 10)
|
|
|
+ if (Number.isFinite(rEnv) && rEnv > 0) {
|
|
|
+ args.push('--radius', String(rEnv))
|
|
|
+ }
|
|
|
+ const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
|
|
|
+ if (process.platform === 'win32') {
|
|
|
+ const pyDir = path.dirname(pythonPath)
|
|
|
+ const pyRoot = path.dirname(path.dirname(pyDir))
|
|
|
+ env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
|
|
|
+ }
|
|
|
+ const r = spawnSync(pythonPath, args, {
|
|
|
+ encoding: 'utf-8',
|
|
|
+ timeout: IMG_CENTER_MARK_SCRIPT_TIMEOUT_MS,
|
|
|
+ env,
|
|
|
+ cwd: projectRoot,
|
|
|
+ windowsHide: true,
|
|
|
+ })
|
|
|
+ if (r.status !== 0) {
|
|
|
+ const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || '绘制中心点标记失败'
|
|
|
+ return { ok: false, error: msg }
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ const tail = String(r.stdout || '').trim().split('\n').filter(Boolean).pop() || ''
|
|
|
+ const j = JSON.parse(tail)
|
|
|
+ if (!j.success) return { ok: false, error: j.error || '绘制中心点标记失败' }
|
|
|
+ } catch (_) {
|
|
|
+ if (!fs.existsSync(outPath) || fs.statSync(outPath).size < 32) {
|
|
|
+ return { ok: false, error: '标记脚本输出非 JSON 或未写出有效 PNG' }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return { ok: true, outPath }
|
|
|
+}
|
|
|
+
|
|
|
+async function runPipeline (device, templateAbsPath, folderPath) {
|
|
|
+ if (!device) return { success: false, error: '缺少设备 ID' }
|
|
|
+ if (!templateAbsPath || !fs.existsSync(templateAbsPath)) {
|
|
|
+ return { success: false, error: `模板不存在: ${templateAbsPath}` }
|
|
|
+ }
|
|
|
+
|
|
|
+ const tmpRoot = resolveWorkflowTmpRoot(folderPath)
|
|
|
+ fs.mkdirSync(tmpRoot, { recursive: true })
|
|
|
+ const workDir = path.join(tmpRoot, `img-center-${Date.now()}`)
|
|
|
+ fs.mkdirSync(workDir, { recursive: true })
|
|
|
+
|
|
|
+ const screenshotPath = path.join(workDir, 'screenshot.png')
|
|
|
+ const templateInWork = path.join(workDir, 'template.png')
|
|
|
+
|
|
|
+ const adbPath = getAdbPath()
|
|
|
+ if (!adbScreencapPng(adbPath, device, screenshotPath)) {
|
|
|
+ return { success: false, error: 'ADB 截图失败' }
|
|
|
+ }
|
|
|
+ fs.copyFileSync(templateAbsPath, templateInWork)
|
|
|
+
|
|
|
+ const dims = readPngIhdrDimensions(screenshotPath)
|
|
|
+ if (!dims) {
|
|
|
+ return { success: false, error: '无法读取截图 PNG 尺寸(IHDR)', workDir }
|
|
|
+ }
|
|
|
+
|
|
|
+ const modelChain = getCenterPointVlmModelChain()
|
|
|
+ if (modelChain.length === 0) {
|
|
|
+ const err = '未配置可用 VLM 模型'
|
|
|
+ fs.writeFileSync(path.join(workDir, 'openai_error.txt'), err, 'utf8')
|
|
|
+ return { success: false, error: err, workDir }
|
|
|
+ }
|
|
|
+
|
|
|
+ const attemptLog = []
|
|
|
+ let lastError = ''
|
|
|
+ /** @type {{ center_rx: number, center_ry: number } | null} */
|
|
|
+ let norm = null
|
|
|
+ let aiData = null
|
|
|
+ let successRaw = null
|
|
|
+ let successModel = null
|
|
|
+
|
|
|
+ for (let i = 0; i < modelChain.length; i++) {
|
|
|
+ const m = modelChain[i]
|
|
|
+ const ai = await callOpenAiCenterPointWithModel(workDir, screenshotPath, templateInWork, m, i)
|
|
|
+ const normTry = ai.ok ? parseCenterNormalizedFromVlm(ai.data, dims.width, dims.height) : null
|
|
|
+ attemptLog.push({
|
|
|
+ index: i,
|
|
|
+ model: m,
|
|
|
+ requestOk: ai.ok,
|
|
|
+ error: ai.ok ? null : ai.error,
|
|
|
+ hasValidCenter: !!normTry,
|
|
|
+ })
|
|
|
+ if (!ai.ok) {
|
|
|
+ lastError = ai.error || 'VLM 中心点失败'
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if (normTry) {
|
|
|
+ norm = normTry
|
|
|
+ aiData = ai.data
|
|
|
+ successRaw = ai.rawResp
|
|
|
+ successModel = m
|
|
|
+ break
|
|
|
+ }
|
|
|
+ lastError = '模型未返回有效中心点(需 center_rx/center_ry 或兼容字段)'
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ fs.writeFileSync(
|
|
|
+ path.join(workDir, 'vlm_center_model_attempts.json'),
|
|
|
+ JSON.stringify(
|
|
|
+ {
|
|
|
+ model_chain: modelChain,
|
|
|
+ success_model: successModel,
|
|
|
+ attempts: attemptLog,
|
|
|
+ },
|
|
|
+ null,
|
|
|
+ 2
|
|
|
+ ),
|
|
|
+ 'utf8'
|
|
|
+ )
|
|
|
+ } catch (_) {}
|
|
|
+
|
|
|
+ if (!norm || !aiData) {
|
|
|
+ fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(lastError || 'unknown'), 'utf8')
|
|
|
+ fs.writeFileSync(path.join(workDir, 'center_parse_error.txt'), String(lastError || 'unknown'), 'utf8')
|
|
|
+ return { success: false, error: lastError || 'VLM 中心点失败', workDir }
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ if (successRaw) {
|
|
|
+ fs.writeFileSync(path.join(workDir, 'openai_raw.json'), JSON.stringify(successRaw, null, 2), 'utf8')
|
|
|
+ }
|
|
|
+ fs.writeFileSync(path.join(workDir, 'vlm_center_parsed.json'), JSON.stringify(aiData, null, 2), 'utf8')
|
|
|
+ } catch (_) {}
|
|
|
+
|
|
|
+ const px = Math.round(norm.center_rx * dims.width)
|
|
|
+ const py = Math.round(norm.center_ry * dims.height)
|
|
|
+ const ix = Math.max(0, Math.min(dims.width - 1, px))
|
|
|
+ const iy = Math.max(0, Math.min(dims.height - 1, py))
|
|
|
+
|
|
|
+ let markedScreenshotPath = null
|
|
|
+ const mark = drawCenterMarkOnScreenshot(workDir, screenshotPath, ix, iy)
|
|
|
+ if (mark.ok) {
|
|
|
+ markedScreenshotPath = mark.outPath || null
|
|
|
+ } else {
|
|
|
+ fs.writeFileSync(
|
|
|
+ path.join(workDir, 'screenshot_center_mark_error.txt'),
|
|
|
+ String(mark.error || 'unknown'),
|
|
|
+ 'utf8'
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ fs.writeFileSync(
|
|
|
+ path.join(workDir, 'vlm_center_result.json'),
|
|
|
+ JSON.stringify(
|
|
|
+ {
|
|
|
+ center_rx: norm.center_rx,
|
|
|
+ center_ry: norm.center_ry,
|
|
|
+ pixel_x: ix,
|
|
|
+ pixel_y: iy,
|
|
|
+ screenshot_width: dims.width,
|
|
|
+ screenshot_height: dims.height,
|
|
|
+ marked_screenshot: markedScreenshotPath
|
|
|
+ ? path.basename(markedScreenshotPath)
|
|
|
+ : null,
|
|
|
+ marked_screenshot_error: markedScreenshotPath ? null : (mark.error || '未生成标记图'),
|
|
|
+ },
|
|
|
+ null,
|
|
|
+ 2
|
|
|
+ ),
|
|
|
+ 'utf8'
|
|
|
+ )
|
|
|
+
|
|
|
+ return {
|
|
|
+ success: true,
|
|
|
+ coordinate: { x: ix, y: iy, width: 1, height: 1 },
|
|
|
+ clickPosition: { x: ix, y: iy },
|
|
|
+ workDir,
|
|
|
+ markedScreenshotPath,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ---------- 旧 runPipeline(ROI + 预处理 + NCC,保留勿删)----------
|
|
|
+async function runPipeline_OLD_NCC (device, templateAbsPath, folderPath) {
|
|
|
+ if (!device) return { success: false, error: '缺少设备 ID' }
|
|
|
+ if (!templateAbsPath || !fs.existsSync(templateAbsPath)) {
|
|
|
+ return { success: false, error: `模板不存在: ${templateAbsPath}` }
|
|
|
+ }
|
|
|
+ if (!fs.existsSync(orbScriptPath)) {
|
|
|
+ return { success: false, error: `未找到 ${orbScriptPath}` }
|
|
|
+ }
|
|
|
+ if (!fs.existsSync(preprocessTemplateScriptPath)) {
|
|
|
+ return { success: false, error: `未找到 ${preprocessTemplateScriptPath}` }
|
|
|
+ }
|
|
|
+
|
|
|
+ const tmpRoot = resolveWorkflowTmpRoot(folderPath)
|
|
|
+ fs.mkdirSync(tmpRoot, { recursive: true })
|
|
|
+ const workDir = path.join(tmpRoot, `img-center-${Date.now()}`)
|
|
|
+ fs.mkdirSync(workDir, { recursive: true })
|
|
|
+
|
|
|
+ const screenshotPath = path.join(workDir, 'screenshot.png')
|
|
|
+ const templateInWork = path.join(workDir, 'template.png')
|
|
|
+ const matchedPath = path.join(workDir, 'Matched.png')
|
|
|
+
|
|
|
+ const adbPath = getAdbPath()
|
|
|
+ if (!adbScreencapPng(adbPath, device, screenshotPath)) {
|
|
|
+ return { success: false, error: 'ADB 截图失败' }
|
|
|
+ }
|
|
|
+ fs.copyFileSync(templateAbsPath, templateInWork)
|
|
|
+
|
|
|
+ const ai = await callOpenAiRoi(workDir, screenshotPath, templateInWork)
|
|
|
+ if (!ai.ok) {
|
|
|
+ fs.writeFileSync(path.join(workDir, 'openai_error.txt'), String(ai.error || 'unknown'), 'utf8')
|
|
|
+ return { success: false, error: ai.error || 'VLM ROI 失败', workDir }
|
|
|
+ }
|
|
|
+ let payload = normalizeVlmPayload(ai.data)
|
|
|
+ if (!isFullScreenRoiHint(payload.roi_hint)) {
|
|
|
+ payload = {
|
|
|
+ ...payload,
|
|
|
+ roi_hint: expandRoiHintNormalized(payload.roi_hint),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ fs.writeFileSync(path.join(workDir, 'vlm_roi.json'), JSON.stringify(payload, null, 2), 'utf8')
|
|
|
+
|
|
|
+ const pythonPath = getPythonPath()
|
|
|
+ const prep = runTemplatePreprocess(pythonPath, templateAbsPath, workDir)
|
|
|
+ if (!prep.ok) {
|
|
|
+ fs.copyFileSync(templateAbsPath, templateInWork)
|
|
|
+ try {
|
|
|
+ const vrPath = path.join(workDir, 'vlm_roi.json')
|
|
|
+ const vr = JSON.parse(fs.readFileSync(vrPath, 'utf8'))
|
|
|
+ vr.template_vlm_preprocessed = false
|
|
|
+ delete vr.template_preprocess_paths
|
|
|
+ fs.writeFileSync(vrPath, JSON.stringify(vr, null, 2), 'utf8')
|
|
|
+ } catch (_) {}
|
|
|
+ fs.writeFileSync(path.join(workDir, 'template_preprocess_error.txt'), String(prep.error || ''), 'utf8')
|
|
|
+ }
|
|
|
+
|
|
|
+ const env = { ...process.env, PYTHONIOENCODING: 'utf-8' }
|
|
|
+ if (!String(env.IMG_CENTER_NCC_MIN_SCORE || '').trim()) {
|
|
|
+ env.IMG_CENTER_NCC_MIN_SCORE = String(IMG_CENTER_NCC_MIN_SCORE_DEFAULT)
|
|
|
+ }
|
|
|
+ if (process.platform === 'win32') {
|
|
|
+ const pyDir = path.dirname(pythonPath)
|
|
|
+ const pyRoot = path.dirname(path.dirname(pyDir))
|
|
|
+ env.PATH = [pyDir, pyRoot, process.env.PATH].filter(Boolean).join(path.delimiter)
|
|
|
+ }
|
|
|
+ const r = spawnSync(
|
|
|
+ pythonPath,
|
|
|
+ [orbScriptPath, '--work-dir', workDir],
|
|
|
+ { encoding: 'utf-8', timeout: PYTHON_ORB_SCRIPT_TIMEOUT_MS, env, cwd: projectRoot }
|
|
|
+ )
|
|
|
+ if (r.status !== 0) {
|
|
|
+ const msg = [r.stderr, r.stdout].filter(Boolean).join('\n').trim() || 'NCC 匹配脚本失败'
|
|
|
+ return { success: false, error: msg, workDir }
|
|
|
+ }
|
|
|
+ let out
|
|
|
+ try {
|
|
|
+ out = JSON.parse(r.stdout.trim())
|
|
|
+ } catch (e) {
|
|
|
+ return { success: false, error: `脚本输出非 JSON: ${(r.stdout || '').slice(0, 300)}`, workDir }
|
|
|
+ }
|
|
|
+ if (!out.success) return { success: false, error: out.error || '匹配失败', workDir }
|
|
|
+ if (!waitUntilMatchedWritten(matchedPath, MATCHED_PNG_MAX_WAIT_MS)) {
|
|
|
+ return { success: false, error: `Matched.png 未就绪: ${matchedPath}`, workDir }
|
|
|
+ }
|
|
|
+ return {
|
|
|
+ success: true,
|
|
|
+ coordinate: { x: out.x, y: out.y, width: out.width, height: out.height },
|
|
|
+ clickPosition: { x: out.center_x, y: out.center_y },
|
|
|
+ workDir,
|
|
|
+ }
|
|
|
+}
|
|
|
+ * ---------- 旧 runPipeline 结束 ----------
|
|
|
+ */
|
|
|
+
|
|
|
+/**
|
|
|
+ * press/locate:Electron 侧 await;返回 { success, coordinate?, clickPosition?, error? }
|
|
|
+ * @param {string} [folderPath] 当前流程目录(如 .../static/process/GenerateNote),临时文件写入其下 tmp/
|
|
|
+ */
|
|
|
+async function matchImageAndGetCoordinate (device, imagePath, folderPath) {
|
|
|
+ const templatePath = path.isAbsolute(imagePath) ? imagePath : path.resolve(projectRoot, imagePath)
|
|
|
+ const r = await runPipeline(device, templatePath, folderPath)
|
|
|
+ if (!r.success) return { success: false, error: r.error }
|
|
|
+ return {
|
|
|
+ success: true,
|
|
|
+ coordinate: r.coordinate,
|
|
|
+ clickPosition: r.clickPosition,
|
|
|
+ markedScreenshotPath: r.markedScreenshotPath || null,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+async function executeImgCenterPointLocation ({ device, template, folderPath }) {
|
|
|
+ if (!device) return { success: false, error: '缺少设备 ID,无法自动获取截图' }
|
|
|
+ if (!template || typeof template !== 'string') return { success: false, error: '缺少模板图片路径(inVars[0])' }
|
|
|
+ const baseDir = folderPath && typeof folderPath === 'string' ? folderPath : projectRoot
|
|
|
+ const isAbsoluteOrDrive = template.startsWith('/') || template.includes(':')
|
|
|
+ const hasSubPath = template.includes('/') || template.includes(path.sep)
|
|
|
+ const templatePath = isAbsoluteOrDrive ? template : (hasSubPath ? path.join(baseDir, template) : path.join(baseDir, 'resources', template))
|
|
|
+
|
|
|
+ const result = await runPipeline(device, templatePath, folderPath)
|
|
|
+ if (!result.success) return { success: false, error: result.error }
|
|
|
+ const center = result.clickPosition || {
|
|
|
+ x: result.coordinate.x + result.coordinate.width / 2,
|
|
|
+ y: result.coordinate.y + result.coordinate.height / 2,
|
|
|
+ }
|
|
|
+ return {
|
|
|
+ success: true,
|
|
|
+ center,
|
|
|
+ coordinate: result.coordinate,
|
|
|
+ workDir: result.workDir,
|
|
|
+ markedScreenshotPath: result.markedScreenshotPath || null,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+module.exports = {
|
|
|
+ tagName,
|
|
|
+ schema,
|
|
|
+ executeImgCenterPointLocation,
|
|
|
+ matchImageAndGetCoordinate,
|
|
|
+ /** @deprecated 与 matchImageAndGetCoordinate 相同 */
|
|
|
+ matchImageAndGetCoordinateAsync: matchImageAndGetCoordinate,
|
|
|
+ /** 解析后的模型名 + 当前 ai 包 baseUrl(测试 / 调试) */
|
|
|
+ getImgCenterModel,
|
|
|
+ getImgCenterAiMeta,
|
|
|
+ /** 中心点 VLM 实际调用链(主 + 备用,≤3) */
|
|
|
+ getCenterPointVlmModelChain,
|
|
|
+}
|