mmccanse commited on
Commit
fd974ae
·
1 Parent(s): f93363c

update comments in functions file.

Browse files
Files changed (1) hide show
  1. functions_mm.py +7 -4
functions_mm.py CHANGED
@@ -74,6 +74,10 @@ default_language = "English"
74
  #Setting the Chatbot Model #################################################################################
75
 
76
  #Instantiating the llm we'll use and the arguments to pass
 
 
 
 
77
  llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=OPENAI_MODEL, temperature=0.0)
78
 
79
  # Define the wikipedia topic as a string.
@@ -131,6 +135,9 @@ def translate(transcribed_text, target_lang="es"):
131
  max_length = tokenizer.model_max_length
132
 
133
  # Split text based on sentence endings to better manage translation segments
 
 
 
134
  sentences = re.split(r'(?<=[.!?]) +', transcribed_text)
135
  full_translation = ""
136
 
@@ -239,10 +246,6 @@ def submit_question (audio_filepath=None, typed_text=None, target_lang=default_l
239
  response_speech = "No audio available"
240
 
241
 
242
- #Map detected language code to language name
243
- # detected_lang = [key for key, value in language_map.items() if value == detected_lang_code][0]
244
-
245
-
246
  return response_text, response_speech
247
 
248
  #Define function to transcribe audio and provide output in text and speech
 
74
  #Setting the Chatbot Model #################################################################################
75
 
76
  #Instantiating the llm we'll use and the arguments to pass
77
+ #This is done at a global level, and not within the definition of a function to improve
78
+ #the speed and efficiency of the app. Thus, the model will not be instantiated every time
79
+ #a new question is submitted. Similar setup is created for all of the models called. This
80
+ #was part of our optimization process to help the app be more efficient and effective.
81
  llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=OPENAI_MODEL, temperature=0.0)
82
 
83
  # Define the wikipedia topic as a string.
 
135
  max_length = tokenizer.model_max_length
136
 
137
  # Split text based on sentence endings to better manage translation segments
138
+ # This is done because in previous iterations of the app, some translations hit
139
+ # the max number of tokens and the output was truncated. This is part of our
140
+ # evaluation and optimization process
141
  sentences = re.split(r'(?<=[.!?]) +', transcribed_text)
142
  full_translation = ""
143
 
 
246
  response_speech = "No audio available"
247
 
248
 
 
 
 
 
249
  return response_text, response_speech
250
 
251
  #Define function to transcribe audio and provide output in text and speech