Spaces:

Wedyan2023
/

Data_Generation_LabelingCopy

Sleeping

App Files Files

Wedyan2023 commited on Apr 30

Commit

b51ed4e

verified ·

1 Parent(s): 0b8c1dd

Update app104.py

Browse files

Files changed (1) hide show

app104.py +130 -60

app104.py CHANGED Viewed

@@ -31,73 +31,73 @@ client = OpenAI(
 # from transformers import AutoModelForCausalLM, AutoTokenizer
 # import torch
-# Model selection dropdown
-selected_model = st.selectbox(
-    "Select Model",
-    ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-     "meta-llama/Llama-3.3-70B-Instruct",
-     "meta-llama/Llama-3.2-3B-Instruct",
-     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-     "meta-llama/Meta-Llama-3-8B-Instruct",
-     "meta-llama/Llama-3.1-70B-Instruct"],
-    key='model_select'
-)
-@st.cache_resource  # Cache the model to prevent reloading
-def load_model(model_name):
-    try:
-        # Optimized model loading configuration
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float16,        # Use half precision
-            device_map="auto",                # Automatic device mapping
-            load_in_8bit=True,               # Enable 8-bit quantization
-            low_cpu_mem_usage=True,          # Optimize CPU memory usage
-            max_memory={0: "10GB"}           # Limit GPU memory usage
-        )
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            padding_side="left",
-            truncation_side="left"
-        )
-        return model, tokenizer
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        return None, None
-# Load the selected model with optimizations
-if selected_model:
-    model, tokenizer = load_model(selected_model)
-    # Check if model loaded successfully
-    if model is not None:
-        st.success(f"Successfully loaded {selected_model}")
-    else:
-        st.warning("Please select a different model or check your hardware capabilities")
-# Function to generate text
-def generate_response(prompt, model, tokenizer):
-    try:
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs["input_ids"],
-                max_length=256,
-                num_return_sequences=1,
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=tokenizer.pad_token_id
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response
-    except Exception as e:
-        return f"Error generating response: {str(e)}"
 ############################################################
 ####new
@@ -251,7 +251,77 @@ with st.sidebar:
     #     key='model_select'
     # )
 # model = AutoModelForCausalLM.from_pretrained(
 #     "meta-llama/Meta-Llama-3-8B-Instruct",

 # from transformers import AutoModelForCausalLM, AutoTokenizer
 # import torch
+# # Model selection dropdown
+# selected_model = st.selectbox(
+#     "Select Model",
+#     ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+#      "meta-llama/Llama-3.3-70B-Instruct",
+#      "meta-llama/Llama-3.2-3B-Instruct",
+#      "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+#      "meta-llama/Meta-Llama-3-8B-Instruct",
+#      "meta-llama/Llama-3.1-70B-Instruct"],
+#     key='model_select'
+# )
+# @st.cache_resource  # Cache the model to prevent reloading
+# def load_model(model_name):
+#     try:
+#         # Optimized model loading configuration
+#         model = AutoModelForCausalLM.from_pretrained(
+#             model_name,
+#             torch_dtype=torch.float16,        # Use half precision
+#             device_map="auto",                # Automatic device mapping
+#             load_in_8bit=True,               # Enable 8-bit quantization
+#             low_cpu_mem_usage=True,          # Optimize CPU memory usage
+#             max_memory={0: "10GB"}           # Limit GPU memory usage
+#         )
+#         tokenizer = AutoTokenizer.from_pretrained(
+#             model_name,
+#             padding_side="left",
+#             truncation_side="left"
+#         )
+#         return model, tokenizer
+#     except Exception as e:
+#         st.error(f"Error loading model: {str(e)}")
+#         return None, None
+# # Load the selected model with optimizations
+# if selected_model:
+#     model, tokenizer = load_model(selected_model)
+#     # Check if model loaded successfully
+#     if model is not None:
+#         st.success(f"Successfully loaded {selected_model}")
+#     else:
+#         st.warning("Please select a different model or check your hardware capabilities")
+# # Function to generate text
+# def generate_response(prompt, model, tokenizer):
+#     try:
+#         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+#         with torch.no_grad():
+#             outputs = model.generate(
+#                 inputs["input_ids"],
+#                 max_length=256,
+#                 num_return_sequences=1,
+#                 temperature=0.7,
+#                 do_sample=True,
+#                 pad_token_id=tokenizer.pad_token_id
+#             )
+#         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         return response
+#     except Exception as e:
+#         return f"Error generating response: {str(e)}"
 ############################################################
 ####new
     #     key='model_select'
     # )
+#################new oooo
+# Model selection dropdown
+selected_model = st.selectbox(
+    "Select Model",
+    [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+     "meta-llama/Llama-3.2-3B-Instruct",
+     "meta-llama/Llama-3.3-70B-Instruct",
+     "meta-llama/Llama-3.2-3B-Instruct",
+     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+     "meta-llama/Meta-Llama-3-8B-Instruct",
+     "meta-llama/Llama-3.1-70B-Instruct"],
+    key='model_select'
+)
+@st.cache_resource  # Cache the model to prevent reloading
+def load_model(model_name):
+    try:
+        # Optimized model loading configuration
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,        # Use half precision
+            device_map="auto",                # Automatic device mapping
+            load_in_8bit=True,               # Enable 8-bit quantization
+            low_cpu_mem_usage=True,          # Optimize CPU memory usage
+            max_memory={0: "10GB"}           # Limit GPU memory usage
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            padding_side="left",
+            truncation_side="left"
+        )
+        return model, tokenizer
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        return None, None
+# Load the selected model with optimizations
+if selected_model:
+    model, tokenizer = load_model(selected_model)
+    # Check if model loaded successfully
+    if model is not None:
+        st.success(f"Successfully loaded {selected_model}")
+    else:
+        st.warning("Please select a different model or check your hardware capabilities")
+# Function to generate text
+def generate_response(prompt, model, tokenizer):
+    try:
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs["input_ids"],
+                max_length=256,
+                num_return_sequences=1,
+                temperature=0.7,
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id
+            )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
+################
 # model = AutoModelForCausalLM.from_pretrained(
 #     "meta-llama/Meta-Llama-3-8B-Instruct",