Spaces:

fyzanshaik
/

neuralnet_cfmodel

Build error

App Files Files Community

fyzanshaik commited on 6 days ago

Commit

0ccc7cf

verified ·

1 Parent(s): 70c32e8

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -52

app.py CHANGED Viewed

@@ -1,82 +1,90 @@
-# app.py
 import gradio as gr
-from huggingface_hub import InferenceClient
 # --- Configuration ---
-# Use your friend's model directly via the Inference API
-# The Inference API URL for a model is typically 'https://api-inference.huggingface.co/models/{model_id}'
-# You can also pass just the model_id if it's on the public API:
-CLIENT_MODEL_ID = "neuralnets/cf_codebot"
-client = InferenceClient(CLIENT_MODEL_ID)
 # --- Inference Function ---
-# This function will call the Hugging Face Inference API
-def generate_editorial_from_api(
-    problem_statement: str,
-    max_tokens: int, # Use max_new_tokens for the actual generation length
-    temperature: float,
-    top_p: float,
-):
-    # The InferenceClient for text generation usually expects a direct string input
-    # and not necessarily the chat format (messages list) unless it's a specific chat model.
-    # For a text generation model like cf_codebot (which is GPT-2 based),
-    # you typically just send the input text.
-    # You might want to add a prompt structure here if your friend's model
-    # was fine-tuned with one, e.g., "Problem: {problem_statement}\nEditorial: "
-    prompt = problem_statement
-    # Call the Inference API for text generation
-    # The parameters might vary slightly depending on the specific model type
-    # but these are common for text generation.
-    # We use stream=False for now to get the full response at once for simplicity,
-    # as the model isn't designed for a chat interface, but rather a single generation.
     try:
-        response = client.text_generation(
-            prompt=prompt,
-            max_new_tokens=max_tokens, # Renamed to max_new_tokens for clarity
-            temperature=temperature,
             top_p=top_p,
-            do_sample=True, # Usually good for creative text generation
-            # Add stop_sequences if the model generates specific end tokens like "<end_of_turn>"
-            stop_sequences=["<end_of_turn>"] # Add this if your friend's model reliably uses it
         )
-        # The response from text_generation is usually the generated string directly
-        # or a dictionary that needs parsing depending on client version.
-        # Let's assume it returns the string directly for now.
-        editorial_content = response.strip()
-        # If it still includes the problem statement, try to remove it (heuristic)
-        if editorial_content.startswith(problem_statement):
-            editorial_content = editorial_content[len(problem_statement):].strip()
         return editorial_content
     except Exception as e:
-        print(f"Error during API inference: {e}")
-        return f"An error occurred during editorial generation: {e}. Check logs for details."
 # --- Gradio Interface Setup ---
-# Adapted from your original generated chat interface
 demo = gr.Interface(
-    fn=generate_editorial_from_api,
     inputs=[
         gr.Textbox(lines=10, label="Problem Statement", placeholder="Paste your problem statement here...", autofocus=True),
-        gr.Slider(minimum=1, maximum=1024, value=400, step=1, label="Max new tokens"), # Adjust max for editorials
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
     outputs=gr.Textbox(label="Generated Editorial"),
-    title="Codeforces Editorial Assistant (via Hugging Face Inference API)",
-    description="Enter a problem statement to get a generated editorial from neuralnets/cf_codebot.",
-    allow_flagging="auto",
     examples=[
-        ["A. Watermelon\ntime limit per test\n1 second\nmemory limit per test\n64 megabytes\n\nOne hot summer day Pete and his friend Billy decided to buy a watermelon. They chose the biggest and the ripest one, in their opinion. After that the watermelon was weighed, and the scales showed w kilos. They rushed home, dying of thirst, and decided to divide the berry, however they faced a hard problem.\n\nPete and Billy are great fans of even numbers, that's why they want to divide the watermelon in such a way that each of the two parts weighs even number of kilos, at the same time it is not obligatory that the parts are equal. The boys are extremely tired and want to start their meal as soon as possible, that's why you should help them and find out, if they can divide the watermelon in the way they want. For sure, each of them should get a part of positive weight.\nInput\n\nThe first (and the only) input line contains integer number w (1 ≤ w ≤ 100) — the weight of the watermelon bought by the boys.\nOutput\n\nPrint YES, if the boys can divide the watermelon into two parts, each of them weighing even number of kilos; and NO in the opposite case."]
     ]
 )
 if __name__ == "__main__":
     demo.launch()

+# app.py (Revisit this version from previous long answer)
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
 # --- Configuration ---
+MODEL_NAME = "neuralnets/cf_codebot"
+# --- Model Loading ---
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+    model.eval()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    print(f"Model loaded on: {device}")
+except Exception as e:
+    print(f"Error loading model '{MODEL_NAME}': {e}")
+    print("Using a dummy function for demonstration purposes.")
+    tokenizer, model, device = None, None, "cpu"
 # --- Inference Function ---
+def generate_editorial(problem_statement: str, max_new_tokens: int, temperature: float, top_p: float) -> str:
+    if model is None:
+        return "Model not loaded, using dummy generation. (Check logs)"
     try:
+        input_text = problem_statement
+        inputs = tokenizer(
+            input_text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        ).to(device)
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            top_k=50,
             top_p=top_p,
+            temperature=temperature,
+            stop_sequences=["<end_of_turn>"] # Can use this, or `stop` if transformers is very new
         )
+        generated_sequence = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        if generated_sequence.startswith(input_text):
+            editorial_content = generated_sequence[len(input_text):].strip()
+            editorial_content = editorial_content.replace("<end_of_turn>", "").strip()
+        else:
+            editorial_content = generated_sequence.strip()
+            editorial_content = editorial_content.replace("<end_of_turn>", "").strip()
         return editorial_content
     except Exception as e:
+        print(f"Error during inference: {e}")
+        return f"An error occurred during editorial generation: {e}"
 # --- Gradio Interface Setup ---
 demo = gr.Interface(
+    fn=generate_editorial,
     inputs=[
         gr.Textbox(lines=10, label="Problem Statement", placeholder="Paste your problem statement here...", autofocus=True),
+        gr.Slider(minimum=1, maximum=1024, value=400, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
     outputs=gr.Textbox(label="Generated Editorial"),
+    title="Codeforces Editorial Assistant (Model Loaded In-Space)",
+    description="Paste a Codeforces problem statement and get a generated editorial from neuralnets/cf_codebot.",
+    flagging_mode="auto", # Updated from allow_flagging
     examples=[
+        [
+            "A. Watermelon\ntime limit per test\n1 second\nmemory limit per test\n64 megabytes\n\nOne hot summer day Pete and his friend Billy decided to buy a watermelon. They chose the biggest and the ripest one, in their opinion. After that the watermelon was weighed, and the scales showed w kilos. They rushed home, dying of thirst, and decided to divide the berry, however they faced a hard problem.\n\nPete and Billy are great fans of even numbers, that's why they want to divide the watermelon in such a way that each of the two parts weighs even number of kilos, at the same time it is not obligatory that the parts are equal. The boys are extremely tired and want to start their meal as soon as possible, that's why you should help them and find out, if they can divide the watermelon in the way they want. For sure, each of them should get a part of positive weight.\nInput\n\nThe first (and the only) input line contains integer number w (1 ≤ w ≤ 100) — the weight of the watermelon bought by the boys.\nOutput\n\nPrint YES, if the boys can divide the watermelon into two parts, each of them weighing even number of kilos; and NO in the opposite case.",
+            400,
+            0.7,
+            0.95
+        ]
     ]
 )
 if __name__ == "__main__":
     demo.launch()