import gradio as gr import cv2 from paddleocr import PaddleOCR from langdetect import detect from deep_translator import GoogleTranslator from PIL import ImageFont, ImageDraw, Image import numpy as np import os ocr = PaddleOCR(use_angle_cls=True, lang='en') def process_video(video_file): input_path = video_file output_path = "output_with_subs.mp4" cap = cv2.VideoCapture(input_path) if not cap.isOpened(): return "Video could not be opened!", None width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): return "VideoWriter could not be started!", None frame_count = 0 lang_printed = False all_translations = [] detected_lang = None while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_count += 1 if frame_count % 30 == 0: img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = ocr.ocr(img, cls=True) text = '' for line in result: for word in line: text += word[1][0] + ' ' if text.strip(): try: if len(text.strip()) < 10: detected_lang = 'en' else: detected_lang = detect(text) lang_printed = True except Exception as e: continue try: translated_text = GoogleTranslator(source=detected_lang, target='tr').translate(text) all_translations.append(translated_text) except Exception as e: pass subtitle = ' '.join(all_translations) if subtitle: max_len = 120 if len(subtitle) > max_len: subtitle = subtitle[-max_len:] frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" if not os.path.exists(font_path): font_path = "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf" font = ImageFont.truetype(font_path, 32) draw = ImageDraw.Draw(frame_pil) draw.text((30, height - 60), subtitle, font=font, fill=(0, 255, 0)) frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR) if frame.shape[1] != width or frame.shape[0] != height: frame = cv2.resize(frame, (width, height)) out.write(frame) cap.release() out.release() cv2.destroyAllWindows() return "Success!", output_path demo = gr.Interface( fn=process_video, inputs=gr.Video(type="filepath", label="Upload a video"), outputs=[gr.Textbox(label="Status"), gr.File(label="Download video with subtitles")], title="Video OCR & Translation Subtitle Generator", description="Upload a video. The app will extract text, translate it to Turkish, and add it as subtitles." ) if __name__ == "__main__": demo.launch()