1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
| import time from dataclasses import dataclass from typing import List, Tuple, Optional
import cv2 import numpy as np import pytesseract from mss import mss import pyautogui
@dataclass class Match: box: Tuple[int, int, int, int] score: float
class Screen: def __init__(self): self.sct = mss()
def grab(self, region: Optional[Tuple[int, int, int, int]] = None) -> np.ndarray: if region is None: mon = self.sct.monitors[1] shot = self.sct.grab(mon) else: left, top, w, h = region shot = self.sct.grab({"left": left, "top": top, "width": w, "height": h}) img = np.array(shot)[:, :, :3][:, :, ::-1] return img
class Locator: def __init__(self, screen: Screen): self.screen = screen
def match_template(self, template: np.ndarray, region=None, scales=(0.8, 1.0, 1.2), threshold=0.82) -> List[Match]: img = self.screen.grab(region) H, W = img.shape[:2] matches: List[Match] = [] for s in scales: t = cv2.resize(template, None, fx=s, fy=s, interpolation=cv2.INTER_AREA) res = cv2.matchTemplate(img, t, cv2.TM_CCOEFF_NORMED) ys, xs = np.where(res >= threshold) for (y, x) in zip(ys, xs): w, h = t.shape[1], t.shape[0] matches.append(Match((x, y, w, h), float(res[y, x]))) boxes = np.array([[x, y, x + w, y + h] for (x, y, w, h), _ in [(m.box, m.score) for m in matches]]) if len(boxes) == 0: return [] scores = np.array([m.score for m in matches]) keep = self.nms(boxes, scores, iou_thresh=0.3) return [matches[i] for i in keep]
@staticmethod def nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh=0.3) -> List[int]: x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h iou = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(iou <= iou_thresh)[0] order = order[inds + 1] return keep
def ocr_find_text(self, keywords: List[str], region=None) -> List[Match]: img = self.screen.grab(region) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] d = pytesseract.image_to_data(gray, lang='eng+chi_sim', output_type=pytesseract.Output.DICT) matches = [] for i, text in enumerate(d['text']): if not text: continue for kw in keywords: if kw.lower() in text.lower(): x, y, w, h = d['left'][i], d['top'][i], d['width'][i], d['height'][i] matches.append(Match((x, y, w, h), 0.9)) return matches
class Actor: @staticmethod def click_center(box: Tuple[int, int, int, int]): x, y, w, h = box cx, cy = x + w // 2, y + h // 2 pyautogui.moveTo(cx, cy, duration=0.05) pyautogui.click() time.sleep(0.2)
@staticmethod def type_text(text: str): pyautogui.typewrite(text, interval=0.02)
def login_flow(template_login_btn: np.ndarray): screen = Screen() loc = Locator(screen) actor = Actor()
matches = loc.match_template(template_login_btn, threshold=0.85) if matches: actor.click_center(matches[0].box) else: t_matches = loc.ocr_find_text(["登录", "Sign in", "Log in"]) if not t_matches: raise RuntimeError("找不到登录入口") actor.click_center(t_matches[0].box)
Actor.type_text("user@example.com") pyautogui.press('tab') Actor.type_text("P@ssw0rd!") pyautogui.press('enter')
|