cdp_chrome_browser_core.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. #!/usr/bin/env python3
  2. """Chrome 远程调试附着与启动。"""
  3. from __future__ import annotations
  4. import socket
  5. import subprocess
  6. import sys
  7. from pathlib import Path
  8. from typing import Any
  9. from urllib.parse import urlparse
  10. _SINGLETON_MODULE_SOURCE_FILE_PATH = Path(__file__).resolve()
  11. _REPOSITORY_ROOT_DIRECTORY = _SINGLETON_MODULE_SOURCE_FILE_PATH.parent.parent
  12. from workplace import pyautogui as _human_timing # noqa: E402
  13. def repo_root() -> Path:
  14. return _REPOSITORY_ROOT_DIRECTORY
  15. def _is_blank_url(page_url: str) -> bool:
  16. normalized_url = (page_url or "").strip().lower()
  17. if normalized_url in ("", "about:blank"):
  18. return True
  19. if normalized_url in ("chrome://newtab/", "chrome://new-tab-page/"):
  20. return True
  21. if normalized_url.startswith("chrome://new-tab-page"):
  22. return True
  23. return False
  24. def cdp_endpoint_open(cdp_browser_http_url: str) -> bool:
  25. raw = (cdp_browser_http_url or "").strip()
  26. if not raw:
  27. return False
  28. parsed = urlparse(raw)
  29. hostname = parsed.hostname or "127.0.0.1"
  30. port = parsed.port
  31. if port is None:
  32. scheme = (parsed.scheme or "http").lower()
  33. port = 443 if scheme == "https" else 80
  34. socket_handle = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  35. socket_handle.settimeout(2.0)
  36. connect_result = socket_handle.connect_ex((hostname, port))
  37. socket_handle.close()
  38. return connect_result == 0
  39. def _remote_debugging_tcp_port_from_http_url(cdp_browser_http_url: str) -> int:
  40. parsed = urlparse((cdp_browser_http_url or "").strip())
  41. if parsed.port is not None:
  42. return int(parsed.port)
  43. scheme = (parsed.scheme or "http").lower()
  44. return 443 if scheme == "https" else 80
  45. def _resolve_chrome_executable_path() -> Path:
  46. import os
  47. chrome_path_environment = os.environ.get("CHROME_PATH", "").strip()
  48. if chrome_path_environment:
  49. candidate_path = Path(chrome_path_environment)
  50. if candidate_path.is_file():
  51. return candidate_path
  52. program_files = Path(os.environ.get("ProgramFiles", r"C:\Program Files"))
  53. program_files_x86 = Path(os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"))
  54. local_app_data = Path(os.environ.get("LOCALAPPDATA", ""))
  55. search_list = [
  56. program_files / "Google" / "Chrome" / "Application" / "chrome.exe",
  57. program_files_x86 / "Google" / "Chrome" / "Application" / "chrome.exe",
  58. local_app_data / "Google" / "Chrome" / "Application" / "chrome.exe",
  59. ]
  60. for chrome_executable_candidate in search_list:
  61. if chrome_executable_candidate.is_file():
  62. return chrome_executable_candidate
  63. return program_files / "Google" / "Chrome" / "Application" / "chrome.exe"
  64. def _start_chrome_process_with_remote_debugging_port(
  65. *,
  66. chrome_user_data_directory_path: Path,
  67. remote_debugging_tcp_port: int,
  68. ) -> None:
  69. chrome_executable_path = _resolve_chrome_executable_path()
  70. command_argument_list = [
  71. str(chrome_executable_path),
  72. f"--remote-debugging-port={remote_debugging_tcp_port}",
  73. f"--user-data-dir={chrome_user_data_directory_path}",
  74. "--no-first-run",
  75. "--no-default-browser-check",
  76. ]
  77. creation_flags = 0
  78. if sys.platform == "win32":
  79. creation_flags = subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
  80. subprocess.Popen(
  81. command_argument_list,
  82. stdout=subprocess.DEVNULL,
  83. stderr=subprocess.DEVNULL,
  84. creationflags=creation_flags,
  85. )
  86. def connect_cdp(
  87. playwright_inst: Any,
  88. cdp_browser_http_url: str,
  89. *,
  90. auto_chrome: bool,
  91. profile_dir: Path,
  92. ) -> Any:
  93. normalized_cdp_url = (cdp_browser_http_url or "").strip() or "http://127.0.0.1:9222"
  94. debugging_endpoint_ready = cdp_endpoint_open(normalized_cdp_url)
  95. if not debugging_endpoint_ready and auto_chrome:
  96. remote_debugging_tcp_port = _remote_debugging_tcp_port_from_http_url(
  97. normalized_cdp_url,
  98. )
  99. _start_chrome_process_with_remote_debugging_port(
  100. chrome_user_data_directory_path=profile_dir,
  101. remote_debugging_tcp_port=remote_debugging_tcp_port,
  102. )
  103. _human_timing.sleep_human_chrome_debug_process_ready()
  104. browser = playwright_inst.chromium.connect_over_cdp(normalized_cdp_url)
  105. return browser
  106. def pick_target_page(browser: Any) -> Any:
  107. """优先选用 **空白 / 新标签页**,避免多标签时误附着到同站非首页而跳过 ``goto`` 主页。"""
  108. candidates: list[Any] = []
  109. for browser_context in browser.contexts:
  110. for page in browser_context.pages:
  111. if page is not None and not page.is_closed():
  112. candidates.append(page)
  113. if not candidates:
  114. if browser.contexts:
  115. return browser.contexts[0].new_page()
  116. return None
  117. for page in candidates:
  118. if _is_blank_url(page.url or ""):
  119. return page
  120. return candidates[0]
  121. __all__ = [
  122. "_is_blank_url",
  123. "cdp_endpoint_open",
  124. "connect_cdp",
  125. "pick_target_page",
  126. "repo_root",
  127. ]