Data_Generation_LabelingCopy

Sleeping

App Files Files

Wedyan2023 commited on May 14

Commit

89c24fe

verified ·

1 Parent(s): c2ef26c

Delete app8.py

Browse files

Files changed (1) hide show

app8.py +0 -409

app8.py DELETED Viewed

@@ -1,409 +0,0 @@
-import streamlit as st
-import pandas as pd
-import os
-from datetime import datetime
-import random
-from pathlib import Path
-from openai import OpenAI
-from dotenv import load_dotenv
-from langchain_core.prompts import PromptTemplate
-# Load environment variables
-load_dotenv()
-##openai_api_key = os.getenv("OPENAI_API_KEY")
-# Initialize the client
-client = OpenAI(
-    base_url="https://api-inference.huggingface.co/v1",
-    api_key=os.environ.get('TOKEN2')  # Add your Huggingface token here
-)
-# Initialize OpenAI client
-##client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
-# Custom CSS for better appearance
-st.markdown("""
-<style>
-    .stButton > button {
-        width: 100%;
-        margin-bottom: 10px;
-        background-color: #4CAF50;
-        color: white;
-        border: none;
-        padding: 10px;
-        border-radius: 5px;
-    }
-    .task-button {
-        background-color: #2196F3 !important;
-    }
-    .stSelectbox {
-        margin-bottom: 20px;
-    }
-    .output-container {
-        padding: 20px;
-        border-radius: 5px;
-        border: 1px solid #ddd;
-        margin: 10px 0;
-    }
-    .status-container {
-        padding: 10px;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-    .sidebar-info {
-        padding: 10px;
-        background-color: #f0f2f6;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-</style>
-""", unsafe_allow_html=True)
-# Create data directories if they don't exist
-if not os.path.exists('data'):
-    os.makedirs('data')
-def read_csv_with_encoding(file):
-    encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
-    for encoding in encodings:
-        try:
-            return pd.read_csv(file, encoding=encoding)
-        except UnicodeDecodeError:
-            continue
-    raise UnicodeDecodeError("Failed to read file with any supported encoding")
-def save_to_csv(data, filename):
-    df = pd.DataFrame(data)
-    df.to_csv(f'data/{filename}', index=False)
-    return df
-def load_from_csv(filename):
-    try:
-        return pd.read_csv(f'data/{filename}')
-    except:
-        return pd.DataFrame()
-# Define reset function
-def reset_conversation():
-    st.session_state.conversation = []
-    st.session_state.messages = []
-# Initialize session state
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Main app title
-st.title("🤖 Text Data Generation & Labeling App")
-# Sidebar settings
-with st.sidebar:
-    st.title("⚙️ Settings")
-    selected_model = st.selectbox(
-        "Select Model",
-        ["meta-llama/Meta-Llama-3-8B-Instruct"],
-        key='model_select'
-    )
-    temperature = st.slider(
-        "Temperature",
-        0.0, 1.0, 0.5,
-        help="Controls randomness in generation"
-    )
-    st.button("🔄 Reset Conversation", on_click=reset_conversation)
-    with st.container():
-        st.markdown("""
-        <div class="sidebar-info">
-            <h4>Current Model: {}</h4>
-            <p><em>Note: Generated content may be inaccurate or false.</em></p>
-        </div>
-        """.format(selected_model), unsafe_allow_html=True)
-# Main content
-col1, col2 = st.columns(2)
-with col1:
-    if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
-        st.session_state.task_choice = "Data Generation"
-with col2:
-    if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
-        st.session_state.task_choice = "Data Labeling"
-if "task_choice" in st.session_state:
-    if st.session_state.task_choice == "Data Generation":
-        st.header("📝 Data Generation")
-        classification_type = st.selectbox(
-            "Classification Type",
-            ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
-        )
-        if classification_type == "Sentiment Analysis":
-            labels = ["Positive", "Negative", "Neutral"]
-        elif classification_type == "Binary Classification":
-            col1, col2 = st.columns(2)
-            with col1:
-                label_1 = st.text_input("First class", "Positive")
-            with col2:
-                label_2 = st.text_input("Second class", "Negative")
-            labels = [label_1, label_2] if label_1 and label_2 else ["Positive", "Negative"]
-        else:
-            num_classes = st.slider("Number of classes", 3, 10, 3)
-            labels = []
-            cols = st.columns(3)
-            for i in range(num_classes):
-                with cols[i % 3]:
-                    label = st.text_input(f"Class {i+1}", f"Class_{i+1}")
-                    labels.append(label)
-        domain = st.selectbox("Domain", ["Restaurant reviews", "E-commerce reviews", "Custom"])
-        if domain == "Custom":
-            domain = st.text_input("Specify custom domain")
-        col1, col2 = st.columns(2)
-        with col1:
-            min_words = st.number_input("Min words", 10, 90, 20)
-        with col2:
-            max_words = st.number_input("Max words", min_words, 90, 50)
-        use_few_shot = st.toggle("Use few-shot examples")
-        few_shot_examples = []
-        if use_few_shot:
-            num_examples = st.slider("Number of few-shot examples", 1, 5, 1)
-            for i in range(num_examples):
-                with st.expander(f"Example {i+1}"):
-                    content = st.text_area(f"Content", key=f"few_shot_content_{i}")
-                    label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
-                    if content and label:
-                        few_shot_examples.append({"content": content, "label": label})
-        num_to_generate = st.number_input("Number of examples", 1, 100, 10)
-        user_prompt = st.text_area("Additional instructions (optional)")
-        # Updated prompt template with word length constraints
-        prompt_template = PromptTemplate(
-            input_variables=["classification_type", "domain", "num_examples", "min_words", "max_words", "labels", "user_prompt"],
-            template=(
-                "You are a professional {classification_type} expert tasked with generating examples for {domain}.\n"
-                "Use the following parameters:\n"
-                "- Generate exactly {num_examples} examples\n"
-                "- Each example MUST be between {min_words} and {max_words} words long\n"
-                "- Use these labels: {labels}\n"
-                "- Generate the examples in this format: 'Example text. Label: [label]'\n"
-                "- Do not include word counts or any additional information\n"
-                "Additional instructions: {user_prompt}\n\n"
-                "Generate numbered examples:"
-            )
-        )
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("🎯 Generate Examples"):
-                with st.spinner("Generating examples..."):
-                    system_prompt = prompt_template.format(
-                        classification_type=classification_type,
-                        domain=domain,
-                        num_examples=num_to_generate,
-                        min_words=min_words,
-                        max_words=max_words,
-                        labels=", ".join(labels),
-                        user_prompt=user_prompt
-                    )
-                    try:
-                        stream = client.chat.completions.create(
-                            model=selected_model,
-                            messages=[{"role": "system", "content": system_prompt}],
-                            temperature=temperature,
-                            stream=True,
-                            max_tokens=3000,
-                        )
-                        response = st.write_stream(stream)
-                        st.session_state.messages.append({"role": "assistant", "content": response})
-                    except Exception as e:
-                        st.error("An error occurred during generation.")
-                        st.error(f"Details: {e}")
-        with col2:
-            if st.button("🔄 Regenerate"):
-                st.session_state.messages = st.session_state.messages[:-1] if st.session_state.messages else []
-                with st.spinner("Regenerating examples..."):
-                    system_prompt = prompt_template.format(
-                        classification_type=classification_type,
-                        domain=domain,
-                        num_examples=num_to_generate,
-                        min_words=min_words,
-                        max_words=max_words,
-                        labels=", ".join(labels),
-                        user_prompt=user_prompt
-                    )
-                    try:
-                        stream = client.chat.completions.create(
-                            model=selected_model,
-                            messages=[{"role": "system", "content": system_prompt}],
-                            temperature=temperature,
-                            stream=True,
-                            max_tokens=3000,
-                        )
-                        response = st.write_stream(stream)
-                        st.session_state.messages.append({"role": "assistant", "content": response})
-                    except Exception as e:
-                        st.error("An error occurred during regeneration.")
-                        st.error(f"Details: {e}")
-    elif st.session_state.task_choice == "Data Labeling":
-        st.header("🏷️ Data Labeling")
-        classification_type = st.selectbox(
-            "Classification Type",
-            ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"],
-            key="label_class_type"
-        )
-        if classification_type == "Sentiment Analysis":
-            labels = ["Positive", "Negative", "Neutral"]
-        elif classification_type == "Binary Classification":
-            col1, col2 = st.columns(2)
-            with col1:
-                label_1 = st.text_input("First class", "Positive", key="label_first")
-            with col2:
-                label_2 = st.text_input("Second class", "Negative", key="label_second")
-            labels = [label_1, label_2] if label_1 and label_2 else ["Positive", "Negative"]
-        else:
-            num_classes = st.slider("Number of classes", 3, 10, 3, key="label_num_classes")
-            labels = []
-            cols = st.columns(3)
-            for i in range(num_classes):
-                with cols[i % 3]:
-                    label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
-                    labels.append(label)
-        use_few_shot = st.toggle("Use few-shot examples for labeling")
-        few_shot_examples = []
-        if use_few_shot:
-            num_few_shot = st.slider("Number of few-shot examples", 1, 5, 1)
-            for i in range(num_few_shot):
-                with st.expander(f"Few-shot Example {i+1}"):
-                    content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
-                    label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
-                    if content and label:
-                        few_shot_examples.append(f"{content}\nLabel: {label}")
-        num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
-        examples_to_classify = []
-        if num_examples <= 20:
-            for i in range(num_examples):
-                example = st.text_area(f"Example {i+1}", key=f"example_{i}")
-                if example:
-                    examples_to_classify.append(example)
-        else:
-            examples_text = st.text_area(
-                "Enter examples (one per line)",
-                height=300,
-                help="Enter each example on a new line"
-            )
-            if examples_text:
-                examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
-                if len(examples_to_classify) > num_examples:
-                    examples_to_classify = examples_to_classify[:num_examples]
-        user_prompt = st.text_area("Additional instructions (optional)", key="label_instructions")
-        # Updated prompt template for labeling
-        few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
-        examples_text = "\n".join(f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify))
-        label_prompt_template = PromptTemplate(
-    input_variables=["classification_type", "labels", "few_shot_examples", "examples", "user_prompt"],
-    template=(
-        "You are a professional {classification_type} expert. Classify the following examples using these labels: {labels}.\n"
-        "Instructions:\n"
-        "- Return ONLY the numbered example followed by its classification\n"
-        "- Use the format: 'Example text. Label: [label]'\n"
-        "- Do not provide explanations or justifications\n"
-        "{user_prompt}\n\n"
-        "Few-shot examples:\n{few_shot_examples}\n\n"
-        "Examples to classify:\n{examples}\n\n"
-        "Output:\n"
-    )
-)
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("🏷️ Label Data"):
-                if examples_to_classify:
-                    with st.spinner("Labeling data..."):
-                        system_prompt = label_prompt_template.format(
-                            classification_type=classification_type,
-                            labels=", ".join(labels),
-                            few_shot_examples=few_shot_text,
-                            examples=examples_text,
-                            user_prompt=user_prompt
-                        )
-                        try:
-                            stream = client.chat.completions.create(
-                                model=selected_model,
-                                messages=[{"role": "system", "content": system_prompt}],
-                                temperature=temperature,
-                                stream=True,
-                                max_tokens=3000,
-                            )
-                            response = st.write_stream(stream)
-                            st.session_state.messages.append({"role": "assistant", "content": response})
-                        except Exception as e:
-                            st.error("An error occurred during labeling.")
-                            st.error(f"Details: {e}")
-                else:
-                    st.warning("Please enter at least one example to classify.")
-        with col2:
-            if st.button("🔄 Relabel"):
-                if examples_to_classify:
-                    st.session_state.messages = st.session_state.messages[:-1] if st.session_state.messages else []
-                    with st.spinner("Relabeling data..."):
-                        system_prompt = label_prompt_template.format(
-                            classification_type=classification_type,
-                            labels=", ".join(labels),
-                            few_shot_examples=few_shot_text,
-                            examples=examples_text,
-                            user_prompt=user_prompt
-                        )
-                        try:
-                            stream = client.chat.completions.create(
-                                model=selected_model,
-                                messages=[{"role": "system", "content": system_prompt}],
-                                temperature=temperature,
-                                stream=True,
-                                max_tokens=3000,
-                            )
-                            response = st.write_stream(stream)
-                            st.session_state.messages.append({"role": "assistant", "content": response})
-                        except Exception as e:
-                            st.error("An error occurred during relabeling.")
-                            st.error(f"Details: {e}")
-                else:
-                    st.warning("Please enter at least one example to classify.")
-if st.session_state.messages:
-    st.markdown("### Output:")
-    for message in st.session_state.messages[-1:]:
-        st.markdown(message["content"])
-##if st.session_state.messages:
-    ##st.markdown("### Output:")
-    ##last_message = st.session_state.messages[-1]["content"]
-    # Find the position of "Output:" if it exists
-    ##output_start = last_message.find("Output:")
-    ##if output_start != -1:
-        # Display only the content after "Output:"
-        ##cleaned_output = last_message[output_start + 7:].strip()
-        ##st.markdown(cleaned_output)
-    ##else:
-        # If "Output:" is not found, display the content as is
-        ##st.markdown(last_message)