yichael
/
xhs-note-crawling


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
							#!/usr/bin/env python3
"""
若搜索输入框 DOM 中有非空文案：全屏截图，OCR 在图中找到与文案对应的文字块作锚点，
再在多个「X / ×」候选中取与锚点中心欧氏距离最近的一点，移鼠并点击以清空。
"""

from __future__ import annotations

import importlib.util
import math
import sys
from pathlib import Path

import numpy as np

_ROOT = Path(__file__).resolve().parent.parent.parent
if str(_ROOT) not in sys.path:
    sys.path.insert(0, str(_ROOT))

from workplace import pyautogui as workplace_pyautogui  # noqa: E402


def _load_ocr_pos_module():
    path = Path(__file__).resolve().parent.parent / "ocr-pos.py"
    spec = importlib.util.spec_from_file_location("workplace_ocr_pos_clear_input", path)
    if spec is None or spec.loader is None:
        raise ImportError(f"Cannot load {path}")
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    return mod


def _line_matches_anchor_dom_text(ocr_mod, ocr_line: str, anchor_norm: str) -> bool:
    """与 ocr-pos 空白归一一致；单字锚点要求整行一致，避免「x」误匹配长句。"""
    norm = ocr_mod._normalize_ocr_match_string(ocr_line)
    if not anchor_norm:
        return False
    if len(anchor_norm) >= 2:
        return anchor_norm in norm
    return norm == anchor_norm


def _line_is_clear_x_glyph(ocr_mod, ocr_line: str) -> bool:
    n = ocr_mod._normalize_ocr_match_string(ocr_line).lower()
    return n in ("x", "×")


def _ocr_screen_xy_nearest_x_to_anchor_text(
    image_source_bgr_numpy_or_path: np.ndarray | str | Path,
    anchor_dom_text: str,
    ocr_mod,
) -> tuple[int, int]:
    ocr = ocr_mod._get_rapid_ocr()
    result = ocr(
        ocr_mod._rapidocr_input_from_path_bytes_or_bgr_numpy(
            image_source_bgr_numpy_or_path
        )
    )
    if result is None or not result.txts or result.boxes is None:
        raise RuntimeError("OCR 未返回任何文字")

    anchor_norm = ocr_mod._normalize_ocr_match_string(anchor_dom_text)
    if not anchor_norm:
        raise RuntimeError("输入框文案归一后为空")

    anchor_indices: list[int] = []
    for i, txt in enumerate(result.txts):
        if _line_matches_anchor_dom_text(ocr_mod, txt, anchor_norm):
            anchor_indices.append(i)

    if not anchor_indices:
        raise RuntimeError("OCR 未在截图中找到与输入框文案匹配的文字")

    def _box_area(idx: int) -> float:
        box = result.boxes[idx]
        xs = [float(p[0]) for p in box]
        ys = [float(p[1]) for p in box]
        w = max(xs) - min(xs)
        h = max(ys) - min(ys)
        return w * h

    best_anchor_i = min(anchor_indices, key=_box_area)
    ax, ay = ocr_mod._quad_center_xy(result.boxes[best_anchor_i])

    x_indices = [i for i, t in enumerate(result.txts) if _line_is_clear_x_glyph(ocr_mod, t)]
    if not x_indices:
        raise RuntimeError("OCR 未在截图中找到清除按钮 X")

    best_x_i = min(
        x_indices,
        key=lambda i: math.hypot(
            ax - ocr_mod._quad_center_xy(result.boxes[i])[0],
            ay - ocr_mod._quad_center_xy(result.boxes[i])[1],
        ),
    )
    cx, cy = ocr_mod._quad_center_xy(result.boxes[best_x_i])
    return int(round(cx)), int(round(cy))


def start(
    page,
    *,
    selector_search_input: str,
    full_screen_screenshot_capture_saver,
    move_mouse_to_pos,
    poll_interval_sec: float = 0.35,
    preset_clear_button_xy: tuple[int, int] | None = None,
) -> tuple[int, int] | None:
    """
    若 ``selector_search_input`` 对应输入框存在且 ``input_value`` 非空：截图、
    取离该文案 OCR 块最近的「X / ×」（见 ``_line_is_clear_x_glyph``）并点击。
    若传入 ``preset_clear_button_xy`` 则跳过截图与 OCR，直接移鼠到该点并点击。
    返回该清除钮的屏幕坐标 ``(x, y)``；未执行清空时返回 ``None``。
    """
    loc = page.locator(selector_search_input).first
    if loc.count() == 0:
        return None
    raw = str(loc.input_value(timeout=5_000)).strip()
    if not raw:
        return None

    if preset_clear_button_xy is not None:
        cx, cy = int(preset_clear_button_xy[0]), int(preset_clear_button_xy[1])
        move_mouse_to_pos.start(cx, cy)
        workplace_pyautogui.sleep_human_pre_click_after_pointer_move()
        workplace_pyautogui.click_here()
        workplace_pyautogui.sleep_human_extra_after_dom_click_poll(
            base_sec=poll_interval_sec,
        )
        return (cx, cy)

    ocr_mod = _load_ocr_pos_module()
    full_screen_screenshot_bgr_numpy = (
        full_screen_screenshot_capture_saver.capture_full_screen_and_store_in_memory()
    )
    clear_screen_x, clear_screen_y = _ocr_screen_xy_nearest_x_to_anchor_text(
        full_screen_screenshot_bgr_numpy,
        raw,
        ocr_mod,
    )
    move_mouse_to_pos.start(clear_screen_x, clear_screen_y)
    workplace_pyautogui.sleep_human_pre_click_after_pointer_move()
    workplace_pyautogui.click_here()
    workplace_pyautogui.sleep_human_extra_after_dom_click_poll(
        base_sec=poll_interval_sec,
    )
    return (clear_screen_x, clear_screen_y)


__all__ = ["start"]


if __name__ == "__main__":
    raise SystemExit(
        "请通过 workplace/input-keyword/input-keyword.py 调用 start。",
    )