Spaces:
Sleeping
Sleeping
Essi
commited on
Commit
·
98f5036
1
Parent(s):
e773514
fix: update YouTube transcript retrieval method and clean up normalization logic in GAIAAgent
Browse files
app.py
CHANGED
@@ -124,10 +124,10 @@ def youtube_transcript(url: str, num_first_chars: int = 10_000) -> str:
|
|
124 |
if not video_id:
|
125 |
return "yt_error: id"
|
126 |
try:
|
127 |
-
|
128 |
-
|
129 |
-
)
|
130 |
-
return
|
131 |
except Exception as e:
|
132 |
return f"yt_error: {e}"
|
133 |
|
@@ -352,8 +352,8 @@ class GAIAAgent:
|
|
352 |
raw = num.group(0)
|
353 |
|
354 |
# 2️⃣ Normalize Yes / No
|
355 |
-
if raw.lower().strip(".") in {"yes", "no"}:
|
356 |
-
|
357 |
|
358 |
# 3️⃣ Remove leading 'User:', 'Answer:', etc.
|
359 |
raw = re.sub(r"^(User|Answer|Context):\s*", "", raw, flags=re.I)
|
|
|
124 |
if not video_id:
|
125 |
return "yt_error: id"
|
126 |
try:
|
127 |
+
ytt_api = YouTubeTranscriptApi()
|
128 |
+
fetched_transcript = ytt_api.fetch(video_id=video_id.group(1)).to_raw_data()
|
129 |
+
transcript_str = " ".join([x["text"] for x in fetched_transcript])
|
130 |
+
return transcript_str[:num_first_chars]
|
131 |
except Exception as e:
|
132 |
return f"yt_error: {e}"
|
133 |
|
|
|
352 |
raw = num.group(0)
|
353 |
|
354 |
# 2️⃣ Normalize Yes / No
|
355 |
+
# if raw.lower().strip(".") in {"yes", "no"}:
|
356 |
+
# raw = raw.capitalize()
|
357 |
|
358 |
# 3️⃣ Remove leading 'User:', 'Answer:', etc.
|
359 |
raw = re.sub(r"^(User|Answer|Context):\s*", "", raw, flags=re.I)
|