podcastgen

Running on L40S

App Files Files Community

Rausda6 commited on 17 days ago

Commit

161f5a3

verified ·

1 Parent(s): 39f08bf

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -13

app.py CHANGED Viewed

@@ -165,11 +165,14 @@ Take the following input conversation, and reformat it so that:
 - No timestamps, names, parentheses, or extra formatting, no chapter names, no special characters beside ":"
 - No blank lines
 - Do not invent or change the content
 Example output:
 Speaker 1: Hello and welcome.
 Speaker 2: Thanks! Glad to be here.
 Now format the following:
 {raw_text}
 """
@@ -237,21 +240,28 @@ Now format the following:
         add_log("⚠️ No valid JSON found, creating fallback")
         return self.create_fallback_podcast(text)
     def conversation_to_json(self, text: str) -> Dict:
         """Convert speaker-formatted text to podcast JSON structure"""
         # Allow leading whitespace and enforce full line match
-        cleaned_lines = []
-        for line in text.splitlines():
-            if re.match(r'^\s*Speaker\s*[12]\s*:', line.strip()):
-                cleaned_lines.append(line.strip())
-        podcast = []
-        for line in cleaned_lines:
-            match = re.match(r'^Speaker\s*([12])\s*:\s*(.+)', line)
-            if match:
-                speaker, content = match.groups()
-                podcast.append({"speaker": int(speaker), "line": content.strip()})
         return {
             "topic": "Generated from Input",

 - No timestamps, names, parentheses, or extra formatting, no chapter names, no special characters beside ":"
 - No blank lines
 - Do not invent or change the content
+- you are not allowed to use anywhere in the text the character +#-*<>"()[]
 Example output:
 Speaker 1: Hello and welcome.
 Speaker 2: Thanks! Glad to be here.
+Speaker 1: ...
+Speaker 2: ...
+Speaker 1: ...
+Speaker 2: ...
 Now format the following:
 {raw_text}
 """
         add_log("⚠️ No valid JSON found, creating fallback")
         return self.create_fallback_podcast(text)
+    def normalize_speaker_lines(self,text: str) -> str:
+        """Normalize lines to 'Speaker 1: text' format based on presence of 1 or 2 and a ':' or '-'."""
+        # Convert markdown and bracketed formats to 'Speaker X: ...'
+        text = re.sub(
+            r'(?i)^.*?([12])[^a-zA-Z0-9]*[:\-]\s*',
+            lambda m: f"Speaker {m.group(1)}: ",
+            text,
+            flags=re.MULTILINE
+        )
+        return text
     def conversation_to_json(self, text: str) -> Dict:
         """Convert speaker-formatted text to podcast JSON structure"""
         # Allow leading whitespace and enforce full line match
+        """Convert speaker-formatted text to podcast JSON structure"""
+        text = self.normalize_speaker_lines(text)
+        # Match strict "Speaker X: ..." lines only
+        lines = re.findall(r'^Speaker\s+([12]):\s*(.+)', text, flags=re.MULTILINE)
+        podcast = [{"speaker": int(s), "line": l.strip()} for s, l in lines]
         return {
             "topic": "Generated from Input",