clear-input.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #!/usr/bin/env python3
  2. """
  3. 若搜索输入框 DOM 中有非空文案:全屏截图,OCR 在图中找到与文案对应的文字块作锚点,
  4. 再在多个「X / ×」候选中取与锚点中心欧氏距离最近的一点,移鼠并点击以清空。
  5. """
  6. from __future__ import annotations
  7. import importlib.util
  8. import math
  9. import sys
  10. from pathlib import Path
  11. import numpy as np
  12. _ROOT = Path(__file__).resolve().parent.parent.parent
  13. if str(_ROOT) not in sys.path:
  14. sys.path.insert(0, str(_ROOT))
  15. from workplace import pyautogui as workplace_pyautogui # noqa: E402
  16. def _load_ocr_pos_module():
  17. path = Path(__file__).resolve().parent.parent / "ocr-pos.py"
  18. spec = importlib.util.spec_from_file_location("workplace_ocr_pos_clear_input", path)
  19. if spec is None or spec.loader is None:
  20. raise ImportError(f"Cannot load {path}")
  21. mod = importlib.util.module_from_spec(spec)
  22. spec.loader.exec_module(mod)
  23. return mod
  24. def _line_matches_anchor_dom_text(ocr_mod, ocr_line: str, anchor_norm: str) -> bool:
  25. """与 ocr-pos 空白归一一致;单字锚点要求整行一致,避免「x」误匹配长句。"""
  26. norm = ocr_mod._normalize_ocr_match_string(ocr_line)
  27. if not anchor_norm:
  28. return False
  29. if len(anchor_norm) >= 2:
  30. return anchor_norm in norm
  31. return norm == anchor_norm
  32. def _line_is_clear_x_glyph(ocr_mod, ocr_line: str) -> bool:
  33. n = ocr_mod._normalize_ocr_match_string(ocr_line).lower()
  34. return n in ("x", "×")
  35. def _ocr_screen_xy_nearest_x_to_anchor_text(
  36. image_source_bgr_numpy_or_path: np.ndarray | str | Path,
  37. anchor_dom_text: str,
  38. ocr_mod,
  39. ) -> tuple[int, int]:
  40. ocr = ocr_mod._get_rapid_ocr()
  41. result = ocr(
  42. ocr_mod._rapidocr_input_from_path_bytes_or_bgr_numpy(
  43. image_source_bgr_numpy_or_path
  44. )
  45. )
  46. if result is None or not result.txts or result.boxes is None:
  47. raise RuntimeError("OCR 未返回任何文字")
  48. anchor_norm = ocr_mod._normalize_ocr_match_string(anchor_dom_text)
  49. if not anchor_norm:
  50. raise RuntimeError("输入框文案归一后为空")
  51. anchor_indices: list[int] = []
  52. for i, txt in enumerate(result.txts):
  53. if _line_matches_anchor_dom_text(ocr_mod, txt, anchor_norm):
  54. anchor_indices.append(i)
  55. if not anchor_indices:
  56. raise RuntimeError("OCR 未在截图中找到与输入框文案匹配的文字")
  57. def _box_area(idx: int) -> float:
  58. box = result.boxes[idx]
  59. xs = [float(p[0]) for p in box]
  60. ys = [float(p[1]) for p in box]
  61. w = max(xs) - min(xs)
  62. h = max(ys) - min(ys)
  63. return w * h
  64. best_anchor_i = min(anchor_indices, key=_box_area)
  65. ax, ay = ocr_mod._quad_center_xy(result.boxes[best_anchor_i])
  66. x_indices = [i for i, t in enumerate(result.txts) if _line_is_clear_x_glyph(ocr_mod, t)]
  67. if not x_indices:
  68. raise RuntimeError("OCR 未在截图中找到清除按钮 X")
  69. best_x_i = min(
  70. x_indices,
  71. key=lambda i: math.hypot(
  72. ax - ocr_mod._quad_center_xy(result.boxes[i])[0],
  73. ay - ocr_mod._quad_center_xy(result.boxes[i])[1],
  74. ),
  75. )
  76. cx, cy = ocr_mod._quad_center_xy(result.boxes[best_x_i])
  77. return int(round(cx)), int(round(cy))
  78. def start(
  79. page,
  80. *,
  81. selector_search_input: str,
  82. full_screen_screenshot_capture_saver,
  83. move_mouse_to_pos,
  84. poll_interval_sec: float = 0.35,
  85. preset_clear_button_xy: tuple[int, int] | None = None,
  86. ) -> tuple[int, int] | None:
  87. """
  88. 若 ``selector_search_input`` 对应输入框存在且 ``input_value`` 非空:截图、
  89. 取离该文案 OCR 块最近的「X / ×」(见 ``_line_is_clear_x_glyph``)并点击。
  90. 若传入 ``preset_clear_button_xy`` 则跳过截图与 OCR,直接移鼠到该点并点击。
  91. 返回该清除钮的屏幕坐标 ``(x, y)``;未执行清空时返回 ``None``。
  92. """
  93. loc = page.locator(selector_search_input).first
  94. if loc.count() == 0:
  95. return None
  96. raw = str(loc.input_value(timeout=5_000)).strip()
  97. if not raw:
  98. return None
  99. if preset_clear_button_xy is not None:
  100. cx, cy = int(preset_clear_button_xy[0]), int(preset_clear_button_xy[1])
  101. move_mouse_to_pos.start(cx, cy)
  102. workplace_pyautogui.sleep_human_pre_click_after_pointer_move()
  103. workplace_pyautogui.click_here()
  104. workplace_pyautogui.sleep_human_extra_after_dom_click_poll(
  105. base_sec=poll_interval_sec,
  106. )
  107. return (cx, cy)
  108. ocr_mod = _load_ocr_pos_module()
  109. full_screen_screenshot_bgr_numpy = (
  110. full_screen_screenshot_capture_saver.capture_full_screen_and_store_in_memory()
  111. )
  112. clear_screen_x, clear_screen_y = _ocr_screen_xy_nearest_x_to_anchor_text(
  113. full_screen_screenshot_bgr_numpy,
  114. raw,
  115. ocr_mod,
  116. )
  117. move_mouse_to_pos.start(clear_screen_x, clear_screen_y)
  118. workplace_pyautogui.sleep_human_pre_click_after_pointer_move()
  119. workplace_pyautogui.click_here()
  120. workplace_pyautogui.sleep_human_extra_after_dom_click_poll(
  121. base_sec=poll_interval_sec,
  122. )
  123. return (clear_screen_x, clear_screen_y)
  124. __all__ = ["start"]
  125. if __name__ == "__main__":
  126. raise SystemExit(
  127. "请通过 workplace/input-keyword/input-keyword.py 调用 start。",
  128. )