File size: 3,269 Bytes
aa38b0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1827e4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import cv2
from paddleocr import PaddleOCR
from langdetect import detect
from deep_translator import GoogleTranslator
from PIL import ImageFont, ImageDraw, Image
import numpy as np
import os

ocr = PaddleOCR(use_angle_cls=True, lang='en')

def process_video(video_file):
    input_path = video_file
    output_path = "output_with_subs.mp4"

    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        return "Video could not be opened!", None

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    if not out.isOpened():
        return "VideoWriter could not be started!", None

    frame_count = 0
    lang_printed = False
    all_translations = []
    detected_lang = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        if frame_count % 30 == 0:
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            result = ocr.ocr(img, cls=True)

            text = ''
            for line in result:
                for word in line:
                    text += word[1][0] + ' '

            if text.strip():
                try:
                    if len(text.strip()) < 10:
                        detected_lang = 'en'
                    else:
                        detected_lang = detect(text)
                    lang_printed = True
                except Exception as e:
                    continue

                try:
                    translated_text = GoogleTranslator(source=detected_lang, target='tr').translate(text)
                    all_translations.append(translated_text)
                except Exception as e:
                    pass

        subtitle = ' '.join(all_translations)
        if subtitle:
            max_len = 120
            if len(subtitle) > max_len:
                subtitle = subtitle[-max_len:]
            frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
            if not os.path.exists(font_path):
                font_path = "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf"
            font = ImageFont.truetype(font_path, 32)
            draw = ImageDraw.Draw(frame_pil)
            draw.text((30, height - 60), subtitle, font=font, fill=(0, 255, 0))
            frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
        if frame.shape[1] != width or frame.shape[0] != height:
            frame = cv2.resize(frame, (width, height))
        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    return "Success!", output_path

demo = gr.Interface(
    fn=process_video,
    inputs=gr.Video(type="filepath", label="Upload a video"),
    outputs=[gr.Textbox(label="Status"), gr.File(label="Download video with subtitles")],
    title="Video OCR & Translation Subtitle Generator",
    description="Upload a video. The app will extract text, translate it to Turkish, and add it as subtitles."
)

if __name__ == "__main__":
    demo.launch()