File size: 3,450 Bytes

4da1734

import math
from PIL import Image, ImageDraw


def draw_arrow_refined(draw, start, end, color=(255, 0, 0, 255), width=15, arrow_len=50):
    x1, y1 = start
    x2, y2 = end

    length = math.hypot(x2 - x1, y2 - y1)
    if length < 1e-5:
        return

    angle = math.atan2(y2 - y1, x2 - x1)

    p1_x = x2 - arrow_len * math.cos(angle - math.pi / 6)
    p1_y = y2 - arrow_len * math.sin(angle - math.pi / 6)
    p2_x = x2 - arrow_len * math.cos(angle + math.pi / 6)
    p2_y = y2 - arrow_len * math.sin(angle + math.pi / 6)

    back_off = arrow_len * 0.8
    line_end_x = x2 - (back_off / length) * (x2 - x1)
    line_end_y = y2 - (back_off / length) * (y2 - y1)

    draw.line([start, (line_end_x, line_end_y)], fill=color, width=width)
    draw.polygon([(x2, y2), (p1_x, p1_y), (p2_x, p2_y)], fill=color)


def build_visual_hint(image, action):
    """
    根据 action 在图像上叠加 visual hint:
    - click / long_press / input_text: 红圈
    - scroll / swipe: 红箭头

    支持的 action 格式示例：
    1) click:
        {
            "action_type": "click",
            "x": 540,
            "y": 1470
        }

    2) scroll:
        {
            "action_type": "scroll",
            "direction": "down",
            "x1": 540,
            "y1": 1600,
            "x2": 540,
            "y2": 900
        }
    """
    image = image.convert("RGBA")
    overlay = Image.new("RGBA", image.size, (255, 255, 255, 0))
    draw = ImageDraw.Draw(overlay)

    width, height = image.size
    action_type = action.get("action_type", "")

    fill_color = (255, 0, 0, 100)
    outline_color = (255, 0, 0, 255)

    if action_type in ["scroll", "swipe"]:
        x1 = action.get("x1")
        y1 = action.get("y1")
        x2 = action.get("x2")
        y2 = action.get("y2")
        direction = action.get("direction", "down").lower()

        if None not in [x1, y1, x2, y2]:
            start_point = (int(x1), int(y1))
            end_point = (int(x2), int(y2))
        else:
            cx, cy = width // 2, height // 2
            arrow_len = 300

            if direction == "down":
                start_point, end_point = (cx, cy + arrow_len // 2), (cx, cy - arrow_len // 2)
            elif direction == "up":
                start_point, end_point = (cx, cy - arrow_len // 2), (cx, cy + arrow_len // 2)
            elif direction == "right":
                start_point, end_point = (cx + arrow_len // 2, cy), (cx - arrow_len // 2, cy)
            elif direction == "left":
                start_point, end_point = (cx - arrow_len // 2, cy), (cx + arrow_len // 2, cy)
            else:
                start_point, end_point = (cx, cy + arrow_len // 2), (cx, cy - arrow_len // 2)

        draw_arrow_refined(
            draw,
            start_point,
            end_point,
            color=outline_color,
            width=15,
            arrow_len=50,
        )

    elif action_type in ["click", "long_press", "input_text", "open_app"]:
        x = action.get("x")
        y = action.get("y")

        if x is not None and y is not None:
            x = int(x)
            y = int(y)
            radius = 30 if action_type == "input_text" else 40

            draw.ellipse(
                (x - radius, y - radius, x + radius, y + radius),
                fill=fill_color,
                outline=outline_color,
                width=5,
            )

    return Image.alpha_composite(image, overlay).convert("RGB")