Rausda6 commited on
Commit
161f5a3
·
verified ·
1 Parent(s): 39f08bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -13
app.py CHANGED
@@ -165,11 +165,14 @@ Take the following input conversation, and reformat it so that:
165
  - No timestamps, names, parentheses, or extra formatting, no chapter names, no special characters beside ":"
166
  - No blank lines
167
  - Do not invent or change the content
168
-
169
  Example output:
170
  Speaker 1: Hello and welcome.
171
  Speaker 2: Thanks! Glad to be here.
172
-
 
 
 
173
  Now format the following:
174
  {raw_text}
175
  """
@@ -237,21 +240,28 @@ Now format the following:
237
  add_log("⚠️ No valid JSON found, creating fallback")
238
  return self.create_fallback_podcast(text)
239
 
 
 
 
 
 
 
 
 
 
 
 
240
  def conversation_to_json(self, text: str) -> Dict:
241
 
242
  """Convert speaker-formatted text to podcast JSON structure"""
243
  # Allow leading whitespace and enforce full line match
244
- cleaned_lines = []
245
- for line in text.splitlines():
246
- if re.match(r'^\s*Speaker\s*[12]\s*:', line.strip()):
247
- cleaned_lines.append(line.strip())
248
-
249
- podcast = []
250
- for line in cleaned_lines:
251
- match = re.match(r'^Speaker\s*([12])\s*:\s*(.+)', line)
252
- if match:
253
- speaker, content = match.groups()
254
- podcast.append({"speaker": int(speaker), "line": content.strip()})
255
 
256
  return {
257
  "topic": "Generated from Input",
 
165
  - No timestamps, names, parentheses, or extra formatting, no chapter names, no special characters beside ":"
166
  - No blank lines
167
  - Do not invent or change the content
168
+ - you are not allowed to use anywhere in the text the character +#-*<>"()[]
169
  Example output:
170
  Speaker 1: Hello and welcome.
171
  Speaker 2: Thanks! Glad to be here.
172
+ Speaker 1: ...
173
+ Speaker 2: ...
174
+ Speaker 1: ...
175
+ Speaker 2: ...
176
  Now format the following:
177
  {raw_text}
178
  """
 
240
  add_log("⚠️ No valid JSON found, creating fallback")
241
  return self.create_fallback_podcast(text)
242
 
243
+ def normalize_speaker_lines(self,text: str) -> str:
244
+ """Normalize lines to 'Speaker 1: text' format based on presence of 1 or 2 and a ':' or '-'."""
245
+ # Convert markdown and bracketed formats to 'Speaker X: ...'
246
+ text = re.sub(
247
+ r'(?i)^.*?([12])[^a-zA-Z0-9]*[:\-]\s*',
248
+ lambda m: f"Speaker {m.group(1)}: ",
249
+ text,
250
+ flags=re.MULTILINE
251
+ )
252
+ return text
253
+
254
  def conversation_to_json(self, text: str) -> Dict:
255
 
256
  """Convert speaker-formatted text to podcast JSON structure"""
257
  # Allow leading whitespace and enforce full line match
258
+ """Convert speaker-formatted text to podcast JSON structure"""
259
+ text = self.normalize_speaker_lines(text)
260
+
261
+
262
+ # Match strict "Speaker X: ..." lines only
263
+ lines = re.findall(r'^Speaker\s+([12]):\s*(.+)', text, flags=re.MULTILINE)
264
+ podcast = [{"speaker": int(s), "line": l.strip()} for s, l in lines]
 
 
 
 
265
 
266
  return {
267
  "topic": "Generated from Input",