Spaces:

mguven61
/

Pose-Detection-json

Running

App Files Files Community

mguven61 commited on 13 days ago

Commit

1dc7b14

verified ·

1 Parent(s): db980df

Upload 6 files

Browse files

Files changed (6) hide show

LICENSE +21 -0
app.py +173 -0
pyproject.toml +59 -0
requirements.txt +91 -0
style.css +11 -0
uv.lock +0 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 hysts
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+#!/usr/bin/env python
+import pathlib
+import tempfile
+import cv2
+import gradio as gr
+import numpy as np
+import PIL.Image
+import spaces
+import supervision as sv
+import torch
+import tqdm
+from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation
+DESCRIPTION = "# ViTPose"
+MAX_NUM_FRAMES = 300
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+person_detector_name = "PekingU/rtdetr_r50vd_coco_o365"
+person_image_processor = AutoProcessor.from_pretrained(person_detector_name)
+person_model = RTDetrForObjectDetection.from_pretrained(person_detector_name, device_map=device)
+pose_model_name = "usyd-community/vitpose-base-simple"
+pose_image_processor = AutoProcessor.from_pretrained(pose_model_name)
+pose_model = VitPoseForPoseEstimation.from_pretrained(pose_model_name, device_map=device)
+@spaces.GPU(duration=5)
+@torch.inference_mode()
+def process_image(image: PIL.Image.Image) -> tuple[PIL.Image.Image, list[dict]]:
+    inputs = person_image_processor(images=image, return_tensors="pt").to(device)
+    outputs = person_model(**inputs)
+    results = person_image_processor.post_process_object_detection(
+        outputs, target_sizes=torch.tensor([(image.height, image.width)]), threshold=0.3
+    )
+    result = results[0]
+    person_boxes_xyxy = result["boxes"][result["labels"] == 0]
+    person_boxes_xyxy = person_boxes_xyxy.cpu().numpy()
+    person_boxes = person_boxes_xyxy.copy()
+    person_boxes[:, 2] = person_boxes[:, 2] - person_boxes[:, 0]
+    person_boxes[:, 3] = person_boxes[:, 3] - person_boxes[:, 1]
+    inputs = pose_image_processor(image, boxes=[person_boxes], return_tensors="pt").to(device)
+    if pose_model.config.backbone_config.num_experts > 1:
+        dataset_index = torch.tensor([0] * len(inputs["pixel_values"]))
+        dataset_index = dataset_index.to(inputs["pixel_values"].device)
+        inputs["dataset_index"] = dataset_index
+    outputs = pose_model(**inputs)
+    pose_results = pose_image_processor.post_process_pose_estimation(outputs, boxes=[person_boxes])
+    image_pose_result = pose_results[0]
+    human_readable_results = []
+    for i, person_pose in enumerate(image_pose_result):
+        data = {
+            "person_id": i,
+            "bbox": person_pose["bbox"].numpy().tolist(),
+            "keypoints": [],
+        }
+        for keypoint, label, score in zip(
+            person_pose["keypoints"], person_pose["labels"], person_pose["scores"], strict=True
+        ):
+            keypoint_name = pose_model.config.id2label[label.item()]
+            x, y = keypoint
+            data["keypoints"].append({"name": keypoint_name, "x": x.item(), "y": y.item(), "score": score.item()})
+        human_readable_results.append(data)
+    xy = [pose_result["keypoints"] for pose_result in image_pose_result]
+    xy = torch.stack(xy).cpu().numpy()
+    scores = [pose_result["scores"] for pose_result in image_pose_result]
+    scores = torch.stack(scores).cpu().numpy()
+    keypoints = sv.KeyPoints(xy=xy, confidence=scores)
+    detections = sv.Detections(xyxy=person_boxes_xyxy)
+    edge_annotator = sv.EdgeAnnotator(color=sv.Color.GREEN, thickness=1)
+    vertex_annotator = sv.VertexAnnotator(color=sv.Color.RED, radius=2)
+    bounding_box_annotator = sv.BoxAnnotator(color=sv.Color.WHITE, color_lookup=sv.ColorLookup.INDEX, thickness=1)
+    annotated_frame = image.copy()
+    annotated_frame = bounding_box_annotator.annotate(scene=image.copy(), detections=detections)
+    annotated_frame = edge_annotator.annotate(scene=annotated_frame, key_points=keypoints)
+    return vertex_annotator.annotate(scene=annotated_frame, key_points=keypoints), human_readable_results
+@spaces.GPU(duration=90)
+def process_video(
+    video_path: str,
+    progress: gr.Progress = gr.Progress(track_tqdm=True),
+) -> str:
+    cap = cv2.VideoCapture(video_path)
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as out_file:
+        writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
+        for _ in tqdm.auto.tqdm(range(min(MAX_NUM_FRAMES, num_frames))):
+            ok, frame = cap.read()
+            if not ok:
+                break
+            rgb_frame = frame[:, :, ::-1]
+            annotated_frame, _ = process_image(PIL.Image.fromarray(rgb_frame))
+            writer.write(np.asarray(annotated_frame)[:, :, ::-1])
+        writer.release()
+    cap.release()
+    return out_file.name
+with gr.Blocks(css_paths="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Tabs():
+        with gr.Tab("Image"):
+            with gr.Row():
+                with gr.Column():
+                    input_image = gr.Image(label="Input Image", type="pil")
+                    run_button_image = gr.Button()
+                with gr.Column():
+                    output_image = gr.Image(label="Output Image")
+                    output_json = gr.JSON(label="Output JSON")
+            gr.Examples(
+                examples=sorted(pathlib.Path("images").glob("*.jpg")),
+                inputs=input_image,
+                outputs=[output_image, output_json],
+                fn=process_image,
+            )
+            run_button_image.click(
+                fn=process_image,
+                inputs=input_image,
+                outputs=[output_image, output_json],
+            )
+        with gr.Tab("Video"):
+            gr.Markdown(f"The input video will be truncated to {MAX_NUM_FRAMES} frames.")
+            with gr.Row():
+                with gr.Column():
+                    input_video = gr.Video(label="Input Video")
+                    run_button_video = gr.Button()
+                with gr.Column():
+                    output_video = gr.Video(label="Output Video")
+            gr.Examples(
+                examples=sorted(pathlib.Path("videos").glob("*.mp4")),
+                inputs=input_video,
+                outputs=output_video,
+                fn=process_video,
+                cache_examples=False,
+            )
+            run_button_video.click(
+                fn=process_video,
+                inputs=input_video,
+                outputs=output_video,
+            )
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,59 @@

+[project]
+name = "pose-detect"
+version = "0.1.0"
+description = ""
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "accelerate>=1.3.0",
+    "gradio>=5.13.2",
+    "hf-transfer>=0.1.9",
+    "opencv-python-headless>=4.11.0.86",
+    "setuptools>=75.8.0",
+    "spaces>=0.32.0",
+    "supervision>=0.25.1",
+    "torch==2.4.0",
+    "transformers>=4.48.1",
+]
+[tool.ruff]
+line-length = 119
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+    "COM812", # missing-trailing-comma
+    "D203",   # one-blank-line-before-class
+    "D213",   # multi-line-summary-second-line
+    "E501",   # line-too-long
+    "SIM117", # multiple-with-statements
+]
+extend-ignore = [
+    "D100",    # undocumented-public-module
+    "D101",    # undocumented-public-class
+    "D102",    # undocumented-public-method
+    "D103",    # undocumented-public-function
+    "D104",    # undocumented-public-package
+    "D105",    # undocumented-magic-method
+    "D107",    # undocumented-public-init
+    "EM101",   # raw-string-in-exception
+    "FBT001",  # boolean-type-hint-positional-argument
+    "FBT002",  # boolean-default-value-positional-argument
+    "PD901",   # pandas-df-variable-name
+    "PGH003",  # blanket-type-ignore
+    "PLR0913", # too-many-arguments
+    "PLR0915", # too-many-statements
+    "TRY003",  # raise-vanilla-args
+]
+unfixable = [
+    "F401", # unused-import
+]
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+[tool.ruff.lint.per-file-ignores]
+"*.ipynb" = ["T201"]
+[tool.ruff.format]
+docstring-code-format = true

requirements.txt ADDED Viewed

	@@ -0,0 +1,91 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+accelerate==1.3.0
+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.8.0
+certifi==2024.12.14
+charset-normalizer==3.4.1
+click==8.1.8
+contourpy==1.3.1
+cycler==0.12.1
+defusedxml==0.7.1
+exceptiongroup==1.2.2
+fastapi==0.115.7
+ffmpy==0.5.0
+filelock==3.17.0
+fonttools==4.55.7
+fsspec==2024.12.0
+gradio==5.13.2
+gradio-client==1.6.0
+h11==0.14.0
+hf-transfer==0.1.9
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.0
+idna==3.10
+jinja2==3.1.5
+kiwisolver==1.4.8
+markdown-it-py==3.0.0
+markupsafe==2.1.5
+matplotlib==3.10.0
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==2.2.2
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.1.105
+opencv-python==4.11.0.86
+opencv-python-headless==4.11.0.86
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+psutil==5.9.8
+pydantic==2.10.6
+pydantic-core==2.27.2
+pydub==0.25.1
+pygments==2.19.1
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2024.2
+pyyaml==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+ruff==0.9.3
+safehttpx==0.1.6
+safetensors==0.5.2
+scipy==1.15.1
+semantic-version==2.10.0
+setuptools==75.8.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+spaces==0.32.0
+starlette==0.45.3
+supervision==0.25.1
+sympy==1.13.3
+tokenizers==0.21.0
+tomlkit==0.13.2
+torch==2.4.0
+tqdm==4.67.1
+transformers==4.48.1
+triton==3.0.0
+typer==0.15.1
+typing-extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uvicorn==0.34.0
+websockets==14.2

style.css ADDED Viewed

	@@ -0,0 +1,11 @@

+h1 {
+  text-align: center;
+  display: block;
+}
+#duplicate-button {
+  margin: auto;
+  color: #fff;
+  background: #1565c0;
+  border-radius: 100vh;
+}

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff