mguven61's picture
Update app.py
aa38b0a verified
import gradio as gr
import cv2
from paddleocr import PaddleOCR
from langdetect import detect
from deep_translator import GoogleTranslator
from PIL import ImageFont, ImageDraw, Image
import numpy as np
import os
ocr = PaddleOCR(use_angle_cls=True, lang='en')
def process_video(video_file):
input_path = video_file
output_path = "output_with_subs.mp4"
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
return "Video could not be opened!", None
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
if not out.isOpened():
return "VideoWriter could not be started!", None
frame_count = 0
lang_printed = False
all_translations = []
detected_lang = None
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if frame_count % 30 == 0:
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = ocr.ocr(img, cls=True)
text = ''
for line in result:
for word in line:
text += word[1][0] + ' '
if text.strip():
try:
if len(text.strip()) < 10:
detected_lang = 'en'
else:
detected_lang = detect(text)
lang_printed = True
except Exception as e:
continue
try:
translated_text = GoogleTranslator(source=detected_lang, target='tr').translate(text)
all_translations.append(translated_text)
except Exception as e:
pass
subtitle = ' '.join(all_translations)
if subtitle:
max_len = 120
if len(subtitle) > max_len:
subtitle = subtitle[-max_len:]
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
if not os.path.exists(font_path):
font_path = "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf"
font = ImageFont.truetype(font_path, 32)
draw = ImageDraw.Draw(frame_pil)
draw.text((30, height - 60), subtitle, font=font, fill=(0, 255, 0))
frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
if frame.shape[1] != width or frame.shape[0] != height:
frame = cv2.resize(frame, (width, height))
out.write(frame)
cap.release()
out.release()
cv2.destroyAllWindows()
return "Success!", output_path
demo = gr.Interface(
fn=process_video,
inputs=gr.Video(type="filepath", label="Upload a video"),
outputs=[gr.Textbox(label="Status"), gr.File(label="Download video with subtitles")],
title="Video OCR & Translation Subtitle Generator",
description="Upload a video. The app will extract text, translate it to Turkish, and add it as subtitles."
)
if __name__ == "__main__":
demo.launch()