Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,82 +1,90 @@
|
|
1 |
-
# app.py
|
2 |
import gradio as gr
|
3 |
-
from
|
|
|
4 |
|
5 |
# --- Configuration ---
|
6 |
-
|
7 |
-
# The Inference API URL for a model is typically 'https://api-inference.huggingface.co/models/{model_id}'
|
8 |
-
# You can also pass just the model_id if it's on the public API:
|
9 |
-
CLIENT_MODEL_ID = "neuralnets/cf_codebot"
|
10 |
-
client = InferenceClient(CLIENT_MODEL_ID)
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# --- Inference Function ---
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
max_tokens: int, # Use max_new_tokens for the actual generation length
|
18 |
-
temperature: float,
|
19 |
-
top_p: float,
|
20 |
-
):
|
21 |
-
# The InferenceClient for text generation usually expects a direct string input
|
22 |
-
# and not necessarily the chat format (messages list) unless it's a specific chat model.
|
23 |
-
# For a text generation model like cf_codebot (which is GPT-2 based),
|
24 |
-
# you typically just send the input text.
|
25 |
|
26 |
-
# You might want to add a prompt structure here if your friend's model
|
27 |
-
# was fine-tuned with one, e.g., "Problem: {problem_statement}\nEditorial: "
|
28 |
-
prompt = problem_statement
|
29 |
-
|
30 |
-
# Call the Inference API for text generation
|
31 |
-
# The parameters might vary slightly depending on the specific model type
|
32 |
-
# but these are common for text generation.
|
33 |
-
# We use stream=False for now to get the full response at once for simplicity,
|
34 |
-
# as the model isn't designed for a chat interface, but rather a single generation.
|
35 |
try:
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
top_p=top_p,
|
41 |
-
|
42 |
-
#
|
43 |
-
stop_sequences=["<end_of_turn>"] # Add this if your friend's model reliably uses it
|
44 |
)
|
45 |
-
# The response from text_generation is usually the generated string directly
|
46 |
-
# or a dictionary that needs parsing depending on client version.
|
47 |
-
# Let's assume it returns the string directly for now.
|
48 |
-
editorial_content = response.strip()
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
return editorial_content
|
55 |
|
56 |
except Exception as e:
|
57 |
-
print(f"Error during
|
58 |
-
return f"An error occurred during editorial generation: {e}
|
59 |
-
|
60 |
|
61 |
# --- Gradio Interface Setup ---
|
62 |
-
# Adapted from your original generated chat interface
|
63 |
demo = gr.Interface(
|
64 |
-
fn=
|
65 |
inputs=[
|
66 |
gr.Textbox(lines=10, label="Problem Statement", placeholder="Paste your problem statement here...", autofocus=True),
|
67 |
-
gr.Slider(minimum=1, maximum=1024, value=400, step=1, label="Max new tokens"),
|
68 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
69 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
70 |
],
|
71 |
outputs=gr.Textbox(label="Generated Editorial"),
|
72 |
-
title="Codeforces Editorial Assistant (
|
73 |
-
description="
|
74 |
-
|
75 |
examples=[
|
76 |
-
[
|
|
|
|
|
|
|
|
|
|
|
77 |
]
|
78 |
)
|
79 |
|
80 |
-
|
81 |
if __name__ == "__main__":
|
82 |
demo.launch()
|
|
|
1 |
+
# app.py (Revisit this version from previous long answer)
|
2 |
import gradio as gr
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
import torch
|
5 |
|
6 |
# --- Configuration ---
|
7 |
+
MODEL_NAME = "neuralnets/cf_codebot"
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# --- Model Loading ---
|
10 |
+
try:
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
12 |
+
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
13 |
+
model.eval()
|
14 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
+
model.to(device)
|
16 |
+
print(f"Model loaded on: {device}")
|
17 |
+
|
18 |
+
except Exception as e:
|
19 |
+
print(f"Error loading model '{MODEL_NAME}': {e}")
|
20 |
+
print("Using a dummy function for demonstration purposes.")
|
21 |
+
tokenizer, model, device = None, None, "cpu"
|
22 |
|
23 |
# --- Inference Function ---
|
24 |
+
def generate_editorial(problem_statement: str, max_new_tokens: int, temperature: float, top_p: float) -> str:
|
25 |
+
if model is None:
|
26 |
+
return "Model not loaded, using dummy generation. (Check logs)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
try:
|
29 |
+
input_text = problem_statement
|
30 |
+
|
31 |
+
inputs = tokenizer(
|
32 |
+
input_text,
|
33 |
+
return_tensors="pt",
|
34 |
+
padding=True,
|
35 |
+
truncation=True,
|
36 |
+
max_length=512
|
37 |
+
).to(device)
|
38 |
+
|
39 |
+
outputs = model.generate(
|
40 |
+
**inputs,
|
41 |
+
max_new_tokens=max_new_tokens,
|
42 |
+
num_return_sequences=1,
|
43 |
+
pad_token_id=tokenizer.eos_token_id,
|
44 |
+
do_sample=True,
|
45 |
+
top_k=50,
|
46 |
top_p=top_p,
|
47 |
+
temperature=temperature,
|
48 |
+
stop_sequences=["<end_of_turn>"] # Can use this, or `stop` if transformers is very new
|
|
|
49 |
)
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
generated_sequence = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
52 |
+
|
53 |
+
if generated_sequence.startswith(input_text):
|
54 |
+
editorial_content = generated_sequence[len(input_text):].strip()
|
55 |
+
editorial_content = editorial_content.replace("<end_of_turn>", "").strip()
|
56 |
+
else:
|
57 |
+
editorial_content = generated_sequence.strip()
|
58 |
+
editorial_content = editorial_content.replace("<end_of_turn>", "").strip()
|
59 |
|
60 |
return editorial_content
|
61 |
|
62 |
except Exception as e:
|
63 |
+
print(f"Error during inference: {e}")
|
64 |
+
return f"An error occurred during editorial generation: {e}"
|
|
|
65 |
|
66 |
# --- Gradio Interface Setup ---
|
|
|
67 |
demo = gr.Interface(
|
68 |
+
fn=generate_editorial,
|
69 |
inputs=[
|
70 |
gr.Textbox(lines=10, label="Problem Statement", placeholder="Paste your problem statement here...", autofocus=True),
|
71 |
+
gr.Slider(minimum=1, maximum=1024, value=400, step=1, label="Max new tokens"),
|
72 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
73 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
74 |
],
|
75 |
outputs=gr.Textbox(label="Generated Editorial"),
|
76 |
+
title="Codeforces Editorial Assistant (Model Loaded In-Space)",
|
77 |
+
description="Paste a Codeforces problem statement and get a generated editorial from neuralnets/cf_codebot.",
|
78 |
+
flagging_mode="auto", # Updated from allow_flagging
|
79 |
examples=[
|
80 |
+
[
|
81 |
+
"A. Watermelon\ntime limit per test\n1 second\nmemory limit per test\n64 megabytes\n\nOne hot summer day Pete and his friend Billy decided to buy a watermelon. They chose the biggest and the ripest one, in their opinion. After that the watermelon was weighed, and the scales showed w kilos. They rushed home, dying of thirst, and decided to divide the berry, however they faced a hard problem.\n\nPete and Billy are great fans of even numbers, that's why they want to divide the watermelon in such a way that each of the two parts weighs even number of kilos, at the same time it is not obligatory that the parts are equal. The boys are extremely tired and want to start their meal as soon as possible, that's why you should help them and find out, if they can divide the watermelon in the way they want. For sure, each of them should get a part of positive weight.\nInput\n\nThe first (and the only) input line contains integer number w (1ββ€βwββ€β100) β the weight of the watermelon bought by the boys.\nOutput\n\nPrint YES, if the boys can divide the watermelon into two parts, each of them weighing even number of kilos; and NO in the opposite case.",
|
82 |
+
400,
|
83 |
+
0.7,
|
84 |
+
0.95
|
85 |
+
]
|
86 |
]
|
87 |
)
|
88 |
|
|
|
89 |
if __name__ == "__main__":
|
90 |
demo.launch()
|