Spaces:

L3-AI
/

diabetes_assistant

Sleeping

File size: 11,894 Bytes

from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
from langchain.schema import AIMessage, HumanMessage
import gradio as gr
from transformers import pipeline, AutoTokenizer, TFAutoModelForSeq2SeqLM
import subprocess
import torch
import tempfile
from langdetect import detect
from transformers import MarianMTModel, MarianTokenizer
import boto3
# Additional imports for loading PDF documents and QA chain.
from langchain_community.document_loaders import PyPDFLoader
# Additional imports for loading Wikipedia content and QA chain
from langchain_community.document_loaders import WikipediaLoader
from langchain.chains.question_answering import load_qa_chain
# Import RegEx for translate function, to split sentences in avoiding token limits
import re

#Get keys #########################################################################################
load_dotenv()

# Set the model name for our LLMs.
OPENAI_MODEL = "gpt-3.5-turbo"
# Store the API key in a variable.
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

#Define variables for AWS Polly Access#############################################################
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
aws_default_region = os.getenv('AWS_DEFAULT_REGION')

#Define language variables ###################################################################################

#Define voice map
voice_map = {
    "ar": "Hala",
    "en": "Gregory",
    "es": "Mia",
    "fr": "Liam",
    "de": "Vicki",
    "it": "Bianca",
    "zh": "Hiujin",
    "hi": "Kajal",
    "jap": "Tomoko",
    "trk": "Burcu"
    
    }

#Define language map from full names to ISO codes
language_map = {
    "Arabic (Gulf)": "ar",
    "Chinese (Cantonese)": "zh",
    "English": "en",
    "French": "fr",
    "German": "de",
    "Hindi": "hi",
    "Italian": "it",
    "Japanese": "jap",
    "Spanish": "es",
    "Turkish": "trk"
    
}

# list of languages and their codes for dropdown
languages = gr.Dropdown(
    label="Click in the middle of the dropdown bar to select translation language", 
    choices=list(language_map.keys()))

#Define default language
default_language = "English"

#Setting the Chatbot Model #################################################################################

#Instantiating the llm we'll use and the arguments to pass
#This is done at a global level, and not within the definition of a function to improve
#the speed and efficiency of the app. Thus, the model will not be instantiated every time 
#a new question is submitted.  Similar setup is created for all of the models called. This 
#was part of our optimization process to help the app be more efficient and effective.
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=OPENAI_MODEL, temperature=0.0)

# Define the wikipedia topic as a string.
wiki_topic = "diabetes"

# Load the wikipedia results as documents, using a max of 2.
#included error handling- unable to load documents
try:
    documents = WikipediaLoader(query=wiki_topic, load_max_docs=2, load_all_available_meta=True).load()
except Exception as e:
    print("Failed to load documents:", str(e))
    documents = []
# Create the QA chain using the LLM.
chain = load_qa_chain(llm)

##############################################################################################################

#Define the function to call the OpenAI chat LLM
def handle_query(user_query):
    if not documents:
        return "Source not loading info; please try again later."

    if user_query.lower() == 'quit':
        return "Goodbye!"

    try:
        # Pass the documents and the user's query to the chain, and return the result.
        result = chain.invoke({"input_documents": documents, "question": user_query})
        return result["output_text"] if result["output_text"].strip() else "No answer found, try a different question."
        
    except Exception as e:
        return "An error occurred while searching for the answer: " + str(e)
    
#Language models and functions ############################################################################

#Setup cache mechanism to initialize translation model at module level to improve app speed.

#Define global variables for tokenizer and model
helsinki_model_cache = {}

def get_helsinki_model_and_tokenizer(src_lang, target_lang):
    helsinki_model_name =f"Helsinki-NLP/opus-mt-{src_lang}-{target_lang}"
    if helsinki_model_name not in helsinki_model_cache:
        tokenizer = MarianTokenizer.from_pretrained(helsinki_model_name)
        model = MarianMTModel.from_pretrained(helsinki_model_name)
        helsinki_model_cache[helsinki_model_name] = (tokenizer, model)
    return helsinki_model_cache[helsinki_model_name]

#Define function to transcribe audio to text and then translate it into the specified language
def translate(transcribed_text, target_lang="es"):
    try:
        #Define the model and tokenizer
        src_lang = detect(transcribed_text)
        tokenizer, model = get_helsinki_model_and_tokenizer(src_lang, target_lang)
        max_length = tokenizer.model_max_length

        # Split text based on sentence endings to better manage translation segments
        # This is done because in previous iterations of the app, some translations hit 
        # the max number of tokens and the output was truncated. This is part of our 
        # evaluation and optimization process
        sentences = re.split(r'(?<=[.!?]) +', transcribed_text)
        full_translation = ""

        # Process each sentence individually
        for sentence in sentences:
            tokens = tokenizer.encode(sentence, return_tensors="pt", truncation=True, max_length=max_length)
            if tokens.size(1) > max_length:
                continue  # optionally handle long sentences longer than max # tokens for model
            translated_tokens = model.generate(tokens)
            segment_translation = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
            full_translation += segment_translation + " "

        return full_translation.strip()

    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error in transcription or translation"
        

#Initialize Whisper model at the module level to be used across different calls
transcription_pipeline = None

def initialize_transcription_model():
    global transcription_pipeline
    if transcription_pipeline is None:
        transcription_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")

#Define function to transcribes audio to text using Whisper in the original language it was spoken
def transcribe_audio_original(audio_filepath):
    try:
        if transcription_pipeline is None:
            initialize_transcription_model()
        transcription_result = transcription_pipeline(audio_filepath)
        transcribed_text = transcription_result['text']
        return transcribed_text
    except Exception as e:
        print(f"an error occured: {e}")
        return "Error in transcription"


#Initialize Polly client at module level
polly_client = boto3.client(
    'polly',
    region_name=aws_default_region,
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)

# Define text-to-speech function using Amazon Polly
def polly_text_to_speech(text, lang_code):
    
    try:
    
        #get the appropriate voice ID from the mapping
        voice_id = voice_map[lang_code]
        
        #request speech synthesis
        response = polly_client.synthesize_speech(
            Engine = 'neural',
            Text=text,
            OutputFormat='mp3',
            VoiceId=voice_id
        )
        
        # Save the audio to a temporary file and return its path
        if "AudioStream" in response:
            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as audio_file:
                audio_file.write(response['AudioStream'].read())
                return audio_file.name
    except boto3.exceptions.Boto3Error as e:
        print(f"Error accessing Polly: {e}")
    return None  # Return None if there was an error



#Define function to submit query to Wikipedia to feed into Gradio app
def submit_question (audio_filepath=None, typed_text=None, target_lang=default_language):
    
    #Determine source of text: audio transctiption or direct text input
    # if audio_filepath and typed_text:
    #     return "Please use only one input method at a time", None
    
    if not audio_filepath and not typed_text:
        return "Please provide input by typing or speaking", None
    
    response_speech = None
    response_text = None
    
    if typed_text:
        #submit through handle_query function
        # query_text = typed_text
        detected_lang_code = detect(typed_text)
        response_text = handle_query(typed_text)
        response_speech = polly_text_to_speech(response_text, detected_lang_code)
        
    
    elif audio_filepath:
        #transcribe audio to text in background
        query_text = transcribe_audio_original(audio_filepath)
        detected_lang_code = detect(query_text)
        response_text = handle_query(query_text)
        response_speech = polly_text_to_speech(response_text, detected_lang_code)
        
    
    if not response_speech:
        response_speech = "No audio available"
    
    
    return response_text, response_speech

#Define function to transcribe audio and provide output in text and speech
def transcribe_and_speech(audio_filepath=None, typed_text=None, target_lang=default_language):
    
    #Determine source of text: audio transctiption or direct text input
    if audio_filepath and typed_text:
        return "Please use only one input method at a time", None
    
    query_text = None
    detected_lang_code = None
    original_speech = None
    
    if typed_text:
        #convert typed text to speech
        query_text = typed_text
        detected_lang_code = detect(query_text)
        original_speech = polly_text_to_speech(query_text, detected_lang_code)
        return None, original_speech
    
    elif audio_filepath:
        #transcribe audio to text
        query_text = transcribe_audio_original(audio_filepath)
        detected_lang_code = detect(query_text)
        original_speech = polly_text_to_speech(query_text, detected_lang_code)
        return query_text, original_speech
    
    if not query_text:
        return "Please provide input by typing or speaking.", None
    
    #Check if the language is specified. Default to English if not.
    target_lang_code = language_map.get(target_lang, "en")
    
    #Map detected language code to language name
    detected_lang = [key for key, value in language_map.items() if value == detected_lang_code][0]
    
    
    return query_text, original_speech


#Define function to translate query into target language in text and audio
def translate_and_speech(response_text=None, target_lang=default_language):
    
        
    #Detect language of input text
    detected_lang_code = detect(response_text)
    detected_lang = [key for key, value in language_map.items() if value == detected_lang_code][0]
    
    #Check if the language is specified. Default to English if not.
    target_lang_code = language_map.get(target_lang, "en")
    
    #Process text: translate 
    #Check if the detected language and target language are the same
    if detected_lang == target_lang:
        translated_response = response_text
    else:
        translated_response = translate(response_text, target_lang_code)
    
    #convert to speech
    translated_speech = polly_text_to_speech(translated_response, target_lang_code)
    
    return  translated_response, translated_speech


# Function to clear out all inputs
def clear_inputs():
    return None, None, None, None, None, None