Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -737,7 +737,6 @@ def process_segment_with_gpt(segment, source_lang, target_lang, model="gpt-4", o
|
|
737 |
"""
|
738 |
Processes a single text segment: restores punctuation and translates using an OpenAI GPT model.
|
739 |
"""
|
740 |
-
# Essential check: Ensure the OpenAI client is provided
|
741 |
if openai_client is None:
|
742 |
segment_identifier = f"{segment.get('start', 'N/A')}-{segment.get('end', 'N/A')}"
|
743 |
logger.error(f"❌ OpenAI client was not provided for segment {segment_identifier}. Cannot process.")
|
@@ -767,31 +766,46 @@ def process_segment_with_gpt(segment, source_lang, target_lang, model="gpt-4", o
|
|
767 |
|
768 |
try:
|
769 |
logger.debug(f"Sending request to OpenAI model '{model}' for segment {segment_id}...")
|
770 |
-
response = openai_client.chat.completions.create(
|
771 |
model=model,
|
772 |
messages=[{"role": "user", "content": prompt}],
|
773 |
temperature=0.3
|
774 |
)
|
775 |
content = response.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
776 |
logger.debug(
|
777 |
-
f"
|
778 |
-
f"'{
|
779 |
)
|
780 |
|
781 |
result_json = {}
|
782 |
try:
|
783 |
-
|
784 |
-
result_json = json.loads(content)
|
785 |
except json.JSONDecodeError as e:
|
786 |
logger.warning(
|
787 |
f"⚠️ Failed to parse JSON response for segment {segment_id}. Error: {e}. "
|
788 |
-
f"
|
789 |
)
|
790 |
-
# Fallback behavior if JSON parsing fails: use original text, empty translation
|
791 |
punctuated_text = original_text
|
792 |
-
translated_text = ""
|
793 |
else:
|
794 |
-
# If JSON parsing was successful
|
795 |
punctuated_text = result_json.get("punctuated", original_text)
|
796 |
translated_text = result_json.get("translated", "")
|
797 |
|
@@ -808,12 +822,10 @@ def process_segment_with_gpt(segment, source_lang, target_lang, model="gpt-4", o
|
|
808 |
"translated": translated_text
|
809 |
}
|
810 |
except Exception as e:
|
811 |
-
# Log the full traceback using exc_info=True for better debugging
|
812 |
logger.error(
|
813 |
-
f"❌ An unexpected error occurred
|
814 |
-
exc_info=True
|
815 |
)
|
816 |
-
# Return the original segment with an empty translated text on error
|
817 |
return {
|
818 |
"start": segment["start"],
|
819 |
"end": segment["end"],
|
|
|
737 |
"""
|
738 |
Processes a single text segment: restores punctuation and translates using an OpenAI GPT model.
|
739 |
"""
|
|
|
740 |
if openai_client is None:
|
741 |
segment_identifier = f"{segment.get('start', 'N/A')}-{segment.get('end', 'N/A')}"
|
742 |
logger.error(f"❌ OpenAI client was not provided for segment {segment_identifier}. Cannot process.")
|
|
|
766 |
|
767 |
try:
|
768 |
logger.debug(f"Sending request to OpenAI model '{model}' for segment {segment_id}...")
|
769 |
+
response = openai_client.chat.completions.create(
|
770 |
model=model,
|
771 |
messages=[{"role": "user", "content": prompt}],
|
772 |
temperature=0.3
|
773 |
)
|
774 |
content = response.choices[0].message.content.strip()
|
775 |
+
|
776 |
+
# --- NEW LOGIC: Clean markdown code block fences from the response ---
|
777 |
+
cleaned_content = content
|
778 |
+
if content.startswith("```") and content.endswith("```"):
|
779 |
+
# Attempt to find the actual JSON object within the markdown fence
|
780 |
+
json_start_index = content.find('{')
|
781 |
+
json_end_index = content.rfind('}')
|
782 |
+
|
783 |
+
if json_start_index != -1 and json_end_index != -1 and json_end_index > json_start_index:
|
784 |
+
cleaned_content = content[json_start_index : json_end_index + 1]
|
785 |
+
logger.debug(f"Removed markdown fences for segment {segment_id}. Extracted JSON portion.")
|
786 |
+
else:
|
787 |
+
logger.warning(
|
788 |
+
f"⚠️ Content starts/ends with '```' but a valid JSON object ({{...}}) was not found within "
|
789 |
+
f"fences for segment {segment_id}. Attempting to parse raw content. Raw content: '{content}'"
|
790 |
+
)
|
791 |
+
# --- END NEW LOGIC ---
|
792 |
+
|
793 |
logger.debug(
|
794 |
+
f"Attempting to parse JSON for segment {segment_id}. "
|
795 |
+
f"Content for parsing preview: '{cleaned_content[:200]}{'...' if len(cleaned_content) > 200 else ''}'"
|
796 |
)
|
797 |
|
798 |
result_json = {}
|
799 |
try:
|
800 |
+
result_json = json.loads(cleaned_content)
|
|
|
801 |
except json.JSONDecodeError as e:
|
802 |
logger.warning(
|
803 |
f"⚠️ Failed to parse JSON response for segment {segment_id}. Error: {e}. "
|
804 |
+
f"Content attempted to parse: '{cleaned_content}'" # Log cleaned content here
|
805 |
)
|
|
|
806 |
punctuated_text = original_text
|
807 |
+
translated_text = "" # Return empty translated text on parsing failure
|
808 |
else:
|
|
|
809 |
punctuated_text = result_json.get("punctuated", original_text)
|
810 |
translated_text = result_json.get("translated", "")
|
811 |
|
|
|
822 |
"translated": translated_text
|
823 |
}
|
824 |
except Exception as e:
|
|
|
825 |
logger.error(
|
826 |
+
f"❌ An unexpected error occurred for segment {segment_id}: {e}",
|
827 |
+
exc_info=True # This logs the full traceback
|
828 |
)
|
|
|
829 |
return {
|
830 |
"start": segment["start"],
|
831 |
"end": segment["end"],
|