import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline import logging # Set up logging to capture detailed errors logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load the model and tokenizer model_id = "Ct1tz/Codebert-Base-B2D4G5" try: logger.info("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( model_id, use_fast=False, # Explicitly use slow tokenizer (RobertaTokenizer) force_download=True, # Force redownload to avoid corrupted cache cache_dir=None # Use default cache ) logger.info("Tokenizer loaded successfully.") except Exception as e: logger.error(f"Failed to load tokenizer: {str(e)}") raise try: logger.info("Loading model...") model = AutoModelForSequenceClassification.from_pretrained(model_id) logger.info("Model loaded successfully.") except Exception as e: logger.error(f"Failed to load model: {str(e)}") raise # Create a text classification pipeline classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) # Define the prediction function for Gradio def predict(text): try: # Get prediction result = classifier(text) # Format the output return f"Label: {result[0]['label']}, Score: {result[0]['score']:.4f}" except Exception as e: return f"Prediction error: {str(e)}" # Create Gradio interface iface = gr.Interface( fn=predict, inputs=gr.Textbox(lines=2, placeholder="Enter text here (e.g., 'I like you. I love you')"), outputs="text", title="Text Classification with Codebert", description="Enter text to classify using the Ct1tz/Codebert-Base-B2D4G5 model." ) # Launch the app if __name__ == "__main__": logger.info("Launching Gradio interface...") iface.launch()