File size: 4,670 Bytes
7dd982b
d3a1880
7dd982b
 
106c061
7dd982b
 
dd4e7fa
6ac3507
 
dd4e7fa
de17270
106c061
7dd982b
dd4e7fa
7dd982b
 
 
 
 
 
 
dd4e7fa
7dd982b
f06c20a
 
dd4e7fa
 
2b8e4f0
eec85c6
f06c20a
dd4e7fa
 
 
 
de17270
dd4e7fa
 
 
 
de17270
dd4e7fa
 
4a1e71c
cd6257a
4a1e71c
473c60d
dd4e7fa
de17270
473c60d
cd6257a
 
 
 
 
dd4e7fa
4a1e71c
911c558
cd6257a
dd4e7fa
473c60d
eefc307
 
2d955c8
eefc307
 
dd4e7fa
 
 
 
cd6257a
 
dd4e7fa
cd6257a
 
 
76e3793
dd4e7fa
eec85c6
 
 
106c061
dd4e7fa
2b8e4f0
106c061
 
2b8e4f0
cd6257a
 
 
d3a1880
 
 
 
 
 
cd6257a
 
d3a1880
 
 
 
729f48e
 
d3a1880
 
 
 
 
dd4e7fa
729f48e
 
b82995c
dd4e7fa
7dd982b
eec85c6
106c061
 
 
 
 
 
 
 
dd4e7fa
 
7dd982b
 
106c061
de17270
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
import tempfile, requests, os, subprocess
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from gtts import gTTS
from bs4 import BeautifulSoup
from PIL import Image, ImageDraw, ImageFont
import ffmpeg
import textwrap
import shutil

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
summary_prompt = PromptTemplate.from_template("""
Provide a crisp, promotional-style summary (under 50 words) of the following:

{text}

Summary:
""")
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)


def extract_main_content(url):
    resp = requests.get(url, timeout=10)
    soup = BeautifulSoup(resp.content, "html.parser")
    for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]):
        tag.decompose()
    paras = [p.get_text() for p in soup.find_all("p") if len(p.get_text()) > 60]
    return "\n".join(paras[:20]) or None


def download_logo():
    logo_url = "https://huggingface.co/spaces/csccorner/Link-to-video/resolve/main/csharplogo.png"
    local_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
    try:
        r = requests.get(logo_url, stream=True)
        with open(local_path, 'wb') as f:
            shutil.copyfileobj(r.raw, f)
        return local_path
    except:
        return None


def create_slides(text, duration, output_folder, max_lines=6):
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
    font = ImageFont.truetype(font_path, 48)
    logo_path = download_logo()

    chunks = textwrap.wrap(text, width=36)
    slides = ["\n".join(chunks[i:i+max_lines]) for i in range(0, len(chunks), max_lines)]
    per_slide_time = duration / len(slides)
    slide_paths = []

    for i, slide_text in enumerate(slides):
        bg = Image.new("RGB", (1280, 720), color=(10, 20, 40))
        draw = ImageDraw.Draw(bg)

        lines = slide_text.split("\n")
        total_height = sum([font.getbbox(line)[3] - font.getbbox(line)[1] for line in lines]) + (len(lines) - 1) * 20
        y = max((720 - total_height) // 2, 20)
        for line in lines:
            w = font.getbbox(line)[2] - font.getbbox(line)[0]
            draw.text(((1280 - w) // 2, y), line, font=font, fill="white")
            y += font.getbbox(line)[3] - font.getbbox(line)[1] + 20

        if logo_path:
            logo = Image.open(logo_path).convert("RGBA")
            logo = logo.resize((160, int(160 * logo.size[1] / logo.size[0])))
            bg.paste(logo, (30, 630 - logo.size[1]), logo)

        frame_path = os.path.join(output_folder, f"slide_{i}.png")
        bg.save(frame_path)
        slide_paths.append((frame_path, per_slide_time))

    return slide_paths


def url_to_av_summary(url, duration):
    content = extract_main_content(url)
    if not content:
        return "Failed to extract article content.", None
    summary = summary_chain.invoke({"text": content[:3000]})["text"].replace('"', '')[:300]

    audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
    gTTS(text=summary).save(audio_path)

    frame_dir = tempfile.mkdtemp()
    slides = create_slides(summary, duration, frame_dir)

    concat_txt_path = os.path.join(frame_dir, "slides.txt")
    with open(concat_txt_path, "w") as f:
        for path, t in slides:
            f.write(f"file '{path}'\n")
            f.write(f"duration {t}\n")
        f.write(f"file '{slides[-1][0]}'\n")

    concat_img = os.path.join(frame_dir, "video_input.mp4")
    subprocess.run([
        "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_txt_path,
        "-vsync", "vfr", "-pix_fmt", "yuv420p", concat_img
    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    final_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    video_input = ffmpeg.input(concat_img)
    audio_input = ffmpeg.input(audio_path)

    ffmpeg.output(video_input, audio_input, final_video,
                  vcodec='libx264', acodec='aac', pix_fmt='yuv420p', shortest=None
                  ).run(overwrite_output=True, quiet=True)

    return summary, final_video


iface = gr.Interface(
    fn=url_to_av_summary,
    inputs=[
        gr.Textbox(label="Article URL"),
        gr.Radio([5, 10], label="Video Duration (sec)", value=5)
    ],
    outputs=[
        gr.Textbox(label="Summary"),
        gr.Video(label="Generated AV Summary")
    ],
    title="\U0001F3AE AV Summary Generator (Visual Promo Style)",
    description="Generates a 5/10 sec video summary from article URL with clean typography, audio voiceover, and C# Corner logo."
)

if __name__ == '__main__':
    iface.launch()