Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -657,9 +657,24 @@ Return only valid JSON. Do not include explanation, markdown, or comments.
|
|
657 |
for i, item in enumerate(podcast_json['podcast']):
|
658 |
try:
|
659 |
add_log(f"🎵 Processing line {i+1}/{total_lines}: Speaker {item['speaker']}")
|
660 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
audio_file = await self.tts_generate(
|
662 |
-
|
|
|
663 |
item['speaker'],
|
664 |
speaker1,
|
665 |
speaker2
|
|
|
657 |
for i, item in enumerate(podcast_json['podcast']):
|
658 |
try:
|
659 |
add_log(f"🎵 Processing line {i+1}/{total_lines}: Speaker {item['speaker']}")
|
660 |
+
clean_line = item['line']
|
661 |
+
|
662 |
+
# 🔧 Sanitize malformed lines
|
663 |
+
if not isinstance(clean_line, str) or len(clean_line.strip()) == 0 or clean_line.strip().startswith('"') or "{" in clean_line:
|
664 |
+
add_log(f"⚠️ Malformed line detected for speaker {item['speaker']}: {repr(clean_line[:80])}")
|
665 |
+
# Try to recover from JSON-like noise
|
666 |
+
candidates = re.findall(r'\"line\"\s*:\s*\"([^\"]+)\"', clean_line)
|
667 |
+
if candidates:
|
668 |
+
clean_line = candidates[0]
|
669 |
+
add_log(f"✅ Recovered line: {clean_line}")
|
670 |
+
else:
|
671 |
+
# Fallback: strip bad characters
|
672 |
+
clean_line = re.sub(r'[^A-Za-z0-9\s.,!?;:\-\'"]+', '', clean_line)
|
673 |
+
add_log(f"🛠️ Cleaned fallback line: {clean_line}")
|
674 |
+
|
675 |
audio_file = await self.tts_generate(
|
676 |
+
clean_line,
|
677 |
+
#item['line'],
|
678 |
item['speaker'],
|
679 |
speaker1,
|
680 |
speaker2
|