SameerJugno commited on
Commit
54cf79d
·
verified ·
1 Parent(s): af41bdf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -29,13 +29,41 @@
29
  # title="LLaMA 3 - Fine-tuned Model"
30
  # ).launch()
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  from transformers import AutoTokenizer, AutoModelForCausalLM
33
  from peft import PeftModel
34
  import torch
35
  import gradio as gr
36
 
37
  # Load base model from HF Hub
38
- base_model_name = "distilgpt2"
39
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
40
 
41
  # Load base model (set torch_dtype if needed)
@@ -49,8 +77,8 @@ model.eval()
49
 
50
  def predict(text):
51
  inputs = tokenizer(text, return_tensors="pt").to("cpu") # Use "cuda" if GPU available
52
- outputs = model.generate(**inputs, max_new_tokens=70)
53
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
54
-
55
  iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="LoRA Model Demo")
56
  iface.launch()
 
29
  # title="LLaMA 3 - Fine-tuned Model"
30
  # ).launch()
31
 
32
+ # Here I change the model name
33
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
34
+ # from peft import PeftModel
35
+ # import torch
36
+ # import gradio as gr
37
+
38
+ # # Load base model from HF Hub
39
+ # base_model_name = "distilgpt2"
40
+ # tokenizer = AutoTokenizer.from_pretrained(base_model_name)
41
+
42
+ # # Load base model (set torch_dtype if needed)
43
+ # model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float16)
44
+
45
+ # # Load LoRA adapters from local files in Space
46
+ # adapter_path = "./" # If adapter files are in root or specify folder name
47
+ # model = PeftModel.from_pretrained(model, adapter_path)
48
+
49
+ # model.eval()
50
+
51
+ # def predict(text):
52
+ # inputs = tokenizer(text, return_tensors="pt").to("cpu") # Use "cuda" if GPU available
53
+ # outputs = model.generate(**inputs, max_new_tokens=70)
54
+ # return tokenizer.decode(outputs[0], skip_special_tokens=True)
55
+
56
+ # iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="LoRA Model Demo")
57
+ # iface.launch()
58
+
59
+ // Here is the new code with intent to optimize
60
  from transformers import AutoTokenizer, AutoModelForCausalLM
61
  from peft import PeftModel
62
  import torch
63
  import gradio as gr
64
 
65
  # Load base model from HF Hub
66
+ base_model_name = "unsloth/Llama-3.2-1B" # Use your model path or model name
67
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
68
 
69
  # Load base model (set torch_dtype if needed)
 
77
 
78
  def predict(text):
79
  inputs = tokenizer(text, return_tensors="pt").to("cpu") # Use "cuda" if GPU available
80
+ outputs = model.generate(**inputs, max_new_tokens=100)
81
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
82
+
83
  iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="LoRA Model Demo")
84
  iface.launch()