import gradio as gr import joblib import re import pandas as pd from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser # 1. Translator class TextTranslator(BaseModel): output: str = Field(description="Python string containing the output text translated in the desired language") output_parser = PydanticOutputParser(pydantic_object=TextTranslator) format_instructions = output_parser.get_format_instructions() def text_translator(input_text : str, language : str) -> str: human_template = """Enter the text that you want to translate: {input_text}, and enter the language that you want it to translate to {language}. {format_instructions}""" human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt]) prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions) messages = prompt.to_messages() response = chat(messages = messages) output = output_parser.parse(response.content) output_text = output.output return output_text # 2. Sentiment Analysis classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment") def sentiment_analysis(message, history): """ Función para analizar el sentimiento de un mensaje. Retorna la etiqueta de sentimiento con su probabilidad. """ result = classifier(message) return f"Sentimiento : {result[0]['label']} (Probabilidad: {result[0]['score']:.2f})" # 3. Financial Analyst (LangChain with OpenAI, requires API key) nlp = spacy.load('en_core_web_sm') nlp.add_pipe('sentencizer') def split_in_sentences(text): doc = nlp(text) return [str(sent).strip() for sent in doc.sents] def make_spans(text,results): results_list = [] for i in range(len(results)): results_list.append(results[i]['label']) facts_spans = [] facts_spans = list(zip(split_in_sentences(text),results_list)) return facts_spans auth_token = os.environ.get("HF_Token") ##Speech Recognition asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") def transcribe(audio): text = asr(audio)["text"] return text def speech_to_text(speech): text = asr(speech)["text"] return text ##Summarization summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") def summarize_text(text): resp = summarizer(text) stext = resp[0]['summary_text'] return stext ##Fiscal Tone Analysis fin_model= pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone') def text_to_sentiment(text): sentiment = fin_model(text)[0]["label"] return sentiment ##Company Extraction def fin_ner(text): api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token) replaced_spans = api(text) return replaced_spans ##Fiscal Sentiment by Sentence def fin_ext(text): results = fin_model(split_in_sentences(text)) return make_spans(text,results) ##Forward Looking Statement def fls(text): # fls_model = pipeline("text-classification", model="yiyanghkust/finbert-fls", tokenizer="yiyanghkust/finbert-fls") fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token) results = fls_model(split_in_sentences(text)) return make_spans(text,results) # 4. Personal Info Detection def detect_pii(text): pii_patterns = { "email": r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", "phone": r"\+?\d[\d\-\s]{8,}\d", "credit_card": r"\b(?:\d[ -]*?){13,16}\b" } found = {} for label, pattern in pii_patterns.items(): matches = re.findall(pattern, text) if matches: found[label] = matches return found or "No personal information found." # 5. Telco Customer Churn Prediction script_dir = os.path.dirname(os.path.abspath(__file__)) pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib') model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib') # Load transformation pipeline and model pipeline = joblib.load(pipeline_path) model = joblib.load(model_path) # Create a function to calculate TotalCharges def calculate_total_charges(tenure, monthly_charges): return tenure * monthly_charges # Create a function that applies the ML pipeline and makes predictions def predict(SeniorCitizen, Partner, Dependents, tenure, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, MonthlyCharges): # Calculate TotalCharges TotalCharges = calculate_total_charges(tenure, MonthlyCharges) # Create a dataframe with the input data input_df = pd.DataFrame({ 'SeniorCitizen': [SeniorCitizen], 'Partner': [Partner], 'Dependents': [Dependents], 'tenure': [tenure], 'InternetService': [InternetService], 'OnlineSecurity': [OnlineSecurity], 'OnlineBackup': [OnlineBackup], 'DeviceProtection': [DeviceProtection], 'TechSupport': [TechSupport], 'StreamingTV': [StreamingTV], 'StreamingMovies': [StreamingMovies], 'Contract': [Contract], 'PaperlessBilling': [PaperlessBilling], 'PaymentMethod': [PaymentMethod], 'MonthlyCharges': [MonthlyCharges], 'TotalCharges': [TotalCharges] }) # Selecting categorical and numerical columns separately cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object'] num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object'] X_processed = pipeline.transform(input_df) # Extracting feature names for categorical columns after one-hot encoding cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot'] cat_feature_names = cat_encoder.get_feature_names_out(cat_cols) # Concatenating numerical and categorical feature names feature_names = num_cols + list(cat_feature_names) # Convert X_processed to DataFrame final_df = pd.DataFrame(X_processed, columns=feature_names) # Extract the first three columns and remaining columns, then merge first_three_columns = final_df.iloc[:, :3] remaining_columns = final_df.iloc[:, 3:] final_df = pd.concat([remaining_columns, first_three_columns], axis=1) # Make predictions using the model prediction_probs = model.predict_proba(final_df)[0] prediction_label = { "Prediction: CHURN 🔴": prediction_probs[1], "Prediction: STAY ✅": prediction_probs[0] } return prediction_label input_interface = [] # Gradio UI setup with gr.Blocks() as demo: with gr.Tab("Translator"): gr.HTML("