mguven61's picture
Upload 2 files
a1827e4 verified
raw
history blame
3.37 kB
import gradio as gr
import cv2
from paddleocr import PaddleOCR
from langdetect import detect
from googletrans import Translator
from PIL import ImageFont, ImageDraw, Image
import numpy as np
import os
ocr = PaddleOCR(use_angle_cls=True, lang='en')
translator = Translator()
def process_video(video_file):
input_path = video_file
output_path = "output_with_subs.mp4"
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
return "Video could not be opened!", None
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
if not out.isOpened():
return "VideoWriter could not be started!", None
frame_count = 0
lang_printed = False
all_translations = []
detected_lang = None
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if frame_count % 30 == 0:
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = ocr.ocr(img, cls=True)
text = ''
for line in result:
for word in line:
text += word[1][0] + ' '
if text.strip():
try:
if len(text.strip()) < 10:
detected_lang = 'en'
else:
detected_lang = detect(text)
lang_printed = True
except Exception as e:
continue
try:
translated = translator.translate(text, src=detected_lang, dest='tr')
all_translations.append(translated.text)
except Exception as e:
pass
subtitle = ' '.join(all_translations)
if subtitle:
max_len = 120
if len(subtitle) > max_len:
subtitle = subtitle[-max_len:]
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
if not os.path.exists(font_path):
font_path = "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf"
font = ImageFont.truetype(font_path, 32)
draw = ImageDraw.Draw(frame_pil)
draw.text((30, height - 60), subtitle, font=font, fill=(0, 255, 0))
frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
if frame.shape[1] != width or frame.shape[0] != height:
frame = cv2.resize(frame, (width, height))
out.write(frame)
cap.release()
out.release()
cv2.destroyAllWindows()
return "Success!", output_path
demo = gr.Interface(
fn=process_video,
inputs=gr.Video(type="filepath", label="Upload a video"),
outputs=[gr.Textbox(label="Status"), gr.File(label="Download video with subtitles")],
title="Video OCR & Translation Subtitle Generator",
description="Upload a video. The app will extract text, translate it to Turkish, and add it as subtitles."
)
if __name__ == "__main__":
demo.launch()