Spaces:

EaindraKyaw
/

Object_Detection

Sleeping

App Files Files Community

EaindraKyaw commited on Jan 10

Commit

9f166d9

verified ·

1 Parent(s): 170b9ef

Create app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+!apt-get install espeak
+import io
+import matplotlib.pyplot as plt
+import requests
+import inflect
+from PIL import Image
+def load_image_from_url(url):
+    return Image.open(requests.get(url, stream=True).raw)
+def render_results_in_image(in_pil_img, in_results):
+    plt.figure(figsize=(16, 10))
+    plt.imshow(in_pil_img)
+    ax = plt.gca()
+    for prediction in in_results:
+        x, y = prediction['box']['xmin'], prediction['box']['ymin']
+        w = prediction['box']['xmax'] - prediction['box']['xmin']
+        h = prediction['box']['ymax'] - prediction['box']['ymin']
+        ax.add_patch(plt.Rectangle((x, y),
+                                   w,
+                                   h,
+                                   fill=False,
+                                   color="green",
+                                   linewidth=2))
+        ax.text(
+           x,
+           y,
+           f"{prediction['label']}: {round(prediction['score']*100, 1)}%",
+           color='red'
+        )
+    plt.axis("off")
+    # Save the modified image to a BytesIO object
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, format='png',
+                bbox_inches='tight',
+                pad_inches=0)
+    img_buf.seek(0)
+    modified_image = Image.open(img_buf)
+    # Close the plot to prevent it from being displayed
+    plt.close()
+    return modified_image
+def summarize_predictions_natural_language(predictions):
+    summary = {}
+    p = inflect.engine()
+    for prediction in predictions:
+        label = prediction['label']
+        if label in summary:
+            summary[label] += 1
+        else:
+            summary[label] = 1
+    result_string = "In this image, there are "
+    for i, (label, count) in enumerate(summary.items()):
+        count_string = p.number_to_words(count)
+        result_string += f"{count_string} {label}"
+        if count > 1:
+          result_string += "s"
+        result_string += " "
+        if i == len(summary) - 2:
+          result_string += "and "
+    # Remove the trailing comma and space
+    result_string = result_string.rstrip(', ') + "."
+    return result_string
+##### To ignore warnings #####
+import warnings
+import logging
+from transformers import logging as hf_logging
+def ignore_warnings():
+    # Ignore specific Python warnings
+    warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
+    warnings.filterwarnings("ignore", message="Could not find image processor class")
+    warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated")
+    # Adjust logging for libraries using the logging module
+    logging.basicConfig(level=logging.ERROR)
+    hf_logging.set_verbosity_error()
+########
+from transformers import pipeline
+from PIL import Image
+from IPython.display import Audio as IPythonAudio
+import gradio as gr
+import numpy as np
+import io
+import soundfile as sf
+def processed_image(image):
+    # The uploaded image is a PIL image
+    od_pipe= pipeline("object-detection", model="facebook/detr-resnet-50")
+    pl_out = od_pipe(image)
+    processed_image=render_results_in_image(image,pl_out)
+    text=summarize_predictions_natural_language(pl_out)
+    return processed_image,text
+iface = gr.Interface(processed_image,  # Function to process the image
+    inputs=gr.Image(type="pil"),  # Image upload input
+    outputs=[gr.Image(type="pil"),"text"]  # Image output
+)
+iface.launch()
+tts_pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-vctk")
+narrated_text=tts_pipe(text)
+from IPython.display import Audio as IPythonAudio
+IPythonAudio(narrated_text["audio"][0],
+             rate=narrated_text["sampling_rate"])