#!/usr/bin/env python # -*- coding: utf-8 -*- """ 使用项目内 python/RapidOCR 对图片做 OCR,结果输出为 JSON 到 stdout。 输出 JSON 到 stdout,供 node 层调用。 用法1: python ocr.py --image <图片路径> [--project-root <项目根目录>] 输出: {"success": true, "text": "识别结果"} 或 {"success": false, "error": "..."} 用法2: python ocr.py --image <图片路径> --find-text "要查找的文字" [--project-root <项目根目录>] 在图中查找该文字,返回中心点: {"success": true, "x": 123, "y": 456} 或 {"success": false, "error": "..."} """ import sys import os import json import argparse def box_center(box): """box 为 4 个点 [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] 或类似,返回中心 (cx, cy)""" if box is None or len(box) < 4: return None try: xs = [float(p[0]) for p in box] ys = [float(p[1]) for p in box] except (TypeError, IndexError): return None return (sum(xs) / len(xs), sum(ys) / len(ys)) def polys_to_bbox_center(polys): """多个四边形合并为外接矩形,返回中心 (cx, cy)。polys 为 list of 4-point boxes。""" if not polys: return None all_xs, all_ys = [], [] for box in polys: if box is None or len(box) < 4: continue try: for p in box: all_xs.append(float(p[0])) all_ys.append(float(p[1])) except (TypeError, IndexError): continue if not all_xs or not all_ys: return None cx = (min(all_xs) + max(all_xs)) / 2 cy = (min(all_ys) + max(all_ys)) / 2 return (cx, cy) def normalize_for_match(s): """规范化后用于匹配:去空格、全角括号/数字转半角""" if not s: return '' s = (s or '').strip().replace(' ', '').replace('\u3000', '') t = [] for c in s: if c in ('(', '[', '{'): t.append('(') elif c in (')', ']', '}'): t.append(')') elif '\uff10' <= c <= '\uff19': t.append(chr(ord(c) - 0xFEE0)) else: t.append(c) return ''.join(t) def main(): ap = argparse.ArgumentParser() ap.add_argument('--image', required=True, help='图片路径(绝对或相对)') ap.add_argument('--find-text', default=None, help='要查找的文字;若指定则返回该文字在图中的中心点 x,y') ap.add_argument('--project-root', default=None, help='项目根目录,用于解析相对路径及加载 RapidOCR') args = ap.parse_args() project_root = args.project_root if not project_root: project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) project_root = os.path.normpath(project_root) rapidocr_python = os.path.join(project_root, 'python', 'RapidOCR', 'python') if os.path.isdir(rapidocr_python) and rapidocr_python not in sys.path: sys.path.insert(0, rapidocr_python) image_path = args.image if not os.path.isabs(image_path): image_path = os.path.normpath(os.path.join(project_root, image_path)) if not os.path.isfile(image_path): out = {'success': False, 'error': f'图片不存在: {image_path}'} print(json.dumps(out, ensure_ascii=False)) return try: from rapidocr import RapidOCR except ImportError as e: out = { 'success': False, 'error': f'RapidOCR 导入失败: {str(e).strip()}。请确保 python/RapidOCR 存在且安装依赖(如 pip install onnxruntime;或 cd python/RapidOCR/python && pip install -e .)。', } print(json.dumps(out, ensure_ascii=False)) sys.exit(1) try: find_text = (args.find_text or '').strip() engine = RapidOCR() result = engine(image_path) if result.boxes is None or result.txts is None or len(result.txts) == 0: if find_text: out = {'success': False, 'error': f'图中未识别到文字,或未找到: "{find_text}"'} print(json.dumps(out, ensure_ascii=False)) sys.exit(1) out = {'success': True, 'text': ''} print(json.dumps(out, ensure_ascii=False)) return boxes = result.boxes txts = [str(t).strip() if t is not None else '' for t in result.txts] n = min(len(boxes), len(txts)) def poly_at(j): if j >= len(boxes): return None p = boxes[j] if p is not None and hasattr(p, 'tolist'): return p.tolist() return list(p) if p is not None else None if find_text: find_norm = normalize_for_match(find_text) # 匹配顺序:单条严格等于 → 单条包含 → 多段拼接严格等于 for i in range(n): text = txts[i] if i < len(txts) else '' text_norm = normalize_for_match(text) if text == find_text or (find_norm and text_norm == find_norm): center = box_center(poly_at(i)) if center is not None: out = {'success': True, 'x': int(round(center[0])), 'y': int(round(center[1]))} print(json.dumps(out, ensure_ascii=False)) return for i in range(n): text = txts[i] if i < len(txts) else '' text_norm = normalize_for_match(text) if find_text in text or (find_norm and find_norm in text_norm): center = box_center(poly_at(i)) if center is not None: out = {'success': True, 'x': int(round(center[0])), 'y': int(round(center[1]))} print(json.dumps(out, ensure_ascii=False)) return for start in range(n): for end in range(start + 1, n + 1): seg_text = ''.join(txts[start:end]) seg_norm = normalize_for_match(seg_text) if seg_text == find_text or (find_norm and seg_norm == find_norm): merge_polys = [poly_at(j) for j in range(start, end)] merge_polys = [p for p in merge_polys if p is not None and len(p) >= 4] center = polys_to_bbox_center(merge_polys) if center is not None: out = {'success': True, 'x': int(round(center[0])), 'y': int(round(center[1]))} print(json.dumps(out, ensure_ascii=False)) return break # 多段拼接后包含查找词(如图中为「下一步(2)」时仍能匹配「下一步」) for start in range(n): for end in range(start + 1, n + 1): seg_text = ''.join(txts[start:end]) seg_norm = normalize_for_match(seg_text) if (find_text in seg_text) or (find_norm and find_norm in seg_norm): merge_polys = [poly_at(j) for j in range(start, end)] merge_polys = [p for p in merge_polys if p is not None and len(p) >= 4] center = polys_to_bbox_center(merge_polys) if center is not None: out = {'success': True, 'x': int(round(center[0])), 'y': int(round(center[1]))} print(json.dumps(out, ensure_ascii=False)) return break out = {'success': False, 'error': f'图中未找到文字: "{find_text}"'} print(json.dumps(out, ensure_ascii=False)) sys.exit(1) else: text = '\n'.join(txts) if txts else '' out = {'success': True, 'text': text} print(json.dumps(out, ensure_ascii=False)) except Exception as e: out = {'success': False, 'error': str(e).strip()} print(json.dumps(out, ensure_ascii=False)) sys.exit(1) if __name__ == '__main__': main()