Essi commited on
Commit
98f5036
·
1 Parent(s): e773514

fix: update YouTube transcript retrieval method and clean up normalization logic in GAIAAgent

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -124,10 +124,10 @@ def youtube_transcript(url: str, num_first_chars: int = 10_000) -> str:
124
  if not video_id:
125
  return "yt_error: id"
126
  try:
127
- txt = " ".join(
128
- [x["text"] for x in YouTubeTranscriptApi.get_transcript(video_id.group(1))]
129
- )
130
- return txt[:num_first_chars]
131
  except Exception as e:
132
  return f"yt_error: {e}"
133
 
@@ -352,8 +352,8 @@ class GAIAAgent:
352
  raw = num.group(0)
353
 
354
  # 2️⃣ Normalize Yes / No
355
- if raw.lower().strip(".") in {"yes", "no"}:
356
- raw = raw.capitalize()
357
 
358
  # 3️⃣ Remove leading 'User:', 'Answer:', etc.
359
  raw = re.sub(r"^(User|Answer|Context):\s*", "", raw, flags=re.I)
 
124
  if not video_id:
125
  return "yt_error: id"
126
  try:
127
+ ytt_api = YouTubeTranscriptApi()
128
+ fetched_transcript = ytt_api.fetch(video_id=video_id.group(1)).to_raw_data()
129
+ transcript_str = " ".join([x["text"] for x in fetched_transcript])
130
+ return transcript_str[:num_first_chars]
131
  except Exception as e:
132
  return f"yt_error: {e}"
133
 
 
352
  raw = num.group(0)
353
 
354
  # 2️⃣ Normalize Yes / No
355
+ # if raw.lower().strip(".") in {"yes", "no"}:
356
+ # raw = raw.capitalize()
357
 
358
  # 3️⃣ Remove leading 'User:', 'Answer:', etc.
359
  raw = re.sub(r"^(User|Answer|Context):\s*", "", raw, flags=re.I)