Spaces:

PhilippSpohn
/

tokenprob

Running

App Files Files Community

PhilippSpohn commited on Feb 5

Commit

3bd5de9

1 Parent(s): b9b96cc

Initial commit for HF Spaces deployment

Browse files

Files changed (6) hide show

Dockerfile +19 -0
README.md +17 -23
app.py +1 -1
static/script.js +131 -114
static/style.css +39 -0
templates/index.html +4 -1

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.9-slim
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+# Make port 7860 available (required for HF Spaces)
+EXPOSE 7860
+# Start the app
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,35 +1,29 @@
 # Token Probability Analyzer
-A web application that analyzes token probabilities using various language models. This tool helps visualize and understand how language models predict tokens in a given text sequence.
 ## Features
-- Support for multiple language models (GPT-2, TinyLlama, etc.)
-- Token-by-token probability analysis
-- Percentile scoring for token probabilities
-- Top-k predictions for each position
-- Joint and average log likelihood calculations
-## Setup
-1. Install the required dependencies:
-```bash
-pip install -r requirements.txt
-```
-2. Run the application:
-```bash
-python app.py
-```
-3. Open your browser and navigate to `http://localhost:5000`
 ## Usage
-1. Select a language model from the dropdown menu
 2. Enter your text in the input field
-3. Click "Analyze" to see the token probabilities and predictions
 ## Technical Details
-The application uses Flask for the backend and provides a simple web interface. It leverages the Hugging Face Transformers library to load and run various language models for token probability analysis.

 # Token Probability Analyzer
+This web application allows you to analyze token probabilities and predictions from various language models. It provides insights into how likely each token is according to the model, along with top predictions at each position.
 ## Features
+- Analyze text using different language models (GPT-2, TinyLlama, etc.)
+- View token-by-token probabilities
+- See percentile scores for each token
+- Explore top-k predictions at each position
+- Calculate joint and average log-likelihood
 ## Usage
+1. Select a model from the dropdown menu
 2. Enter your text in the input field
+3. Click "Analyze" to see the results
 ## Technical Details
+Built with:
+- Flask
+- Hugging Face Transformers
+- PyTorch
+- JavaScript for interactive visualizations
+## Deployment
+This app is deployed on Hugging Face Spaces using Docker.

app.py CHANGED Viewed

@@ -95,4 +95,4 @@ def analyze():
     })
 if __name__ == "__main__":
-    app.run(debug=True)

     })
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)

static/script.js CHANGED Viewed

@@ -1,131 +1,148 @@
 document.getElementById("analyze-button").addEventListener("click", async () => {
     const text = document.getElementById("input-text").value;
     const model = document.getElementById("model-select").value;
-    const response = await fetch("/analyze", {
-        method: "POST",
-        headers: {
-            "Content-Type": "application/json"
-        },
-        body: JSON.stringify({ text, model })
-    });
-    const data = await response.json();
-    const coloredTextDiv = document.getElementById("colored-text");
-    coloredTextDiv.innerHTML = "";
-    // Always add the first token
-    const firstToken = data.tokens[0];
-    const firstTokenSpan = document.createElement("span");
-    firstTokenSpan.classList.add("token");
-    // Handle special tokens and regular tokens differently
-    if (firstToken === "<s>" || firstToken === "<|endoftext|>") {
-        firstTokenSpan.style.backgroundColor = "#808080"; // Gray for special tokens
-        firstTokenSpan.textContent = "■";
-        tippy(firstTokenSpan, {
-            content: "<div><strong>Beginning of Sequence</strong></div>",
-            allowHTML: true,
-            theme: 'custom',
-            placement: 'top',
-            interactive: true
-        });
-    } else {
-        // Handle regular first token
-        firstTokenSpan.style.backgroundColor = "#808080"; // or any other color you prefer
-        firstTokenSpan.textContent = firstToken;
-        tippy(firstTokenSpan, {
-            content: `<div><strong>First Token</strong></div>`,
-            allowHTML: true,
-            theme: 'custom',
-            placement: 'top',
-            interactive: true
-        });
-    }
-    coloredTextDiv.appendChild(firstTokenSpan);
-    for (let index = 0; index < data.log_probs.length; index++) {
-        const token = data.tokens[index + 1];
-        const percentile = data.percentiles[index];
-        const logProb = data.log_probs[index];
-        const topKPredictions = data.top_k_predictions[index];
-        const color = getColor(data.log_probs, logProb);
-        const tokenSpan = document.createElement("span");
-        tokenSpan.classList.add("token");
-        tokenSpan.style.backgroundColor = color;
-        let displayToken = token;
-        let specialTokenDescription = "";
-        // Enhanced special token handling
-        if (token === "<s>" || token === "<|endoftext|>") {
-            displayToken = "■";
-            specialTokenDescription = "Beginning of Sequence";
-        } else if (token === "</s>" || token === "<|endoftext|>") {
-            displayToken = "■";
-            specialTokenDescription = "End of Sequence";
-        } else if (token === "<0x0A>") {
-            displayToken = "■";
-            specialTokenDescription = "Newline";
-        } else if (token.startsWith("<") && token.endsWith(">")) {
-            displayToken = "■";
-            specialTokenDescription = "Special Token: " + token;
         } else {
-            // Clean up GPT-2 style tokens (Ġ and Ċ)
-            displayToken = displayToken
-                .replace(/\u2581/g, " ")  // Replace underscore token
-                .replace(/Ġ/g, " ")       // Replace GPT-2 space token
-                .replace(/Ċ/g, "\n");     // Replace GPT-2 newline token
         }
-        tokenSpan.textContent = displayToken;
-        let tooltipContent = "";
-        if (specialTokenDescription) {
-            tooltipContent += `<div style="font-weight: bold; margin-bottom: 8px;">${specialTokenDescription}</div>`;
-        }
-        tooltipContent += `<div style="font-weight: bold; margin-bottom: 4px;">Top 5 Predictions:</div>`;
-        topKPredictions.forEach(pred => {
-            let predToken = pred.token;
-            if (predToken === "<0x0A>") {
-                predToken = "\\n";
-            } else if (predToken.startsWith("<") && predToken.endsWith(">")) {
-                predToken = "[SPECIAL]";
-            } else {
-                predToken = predToken
-                    .replace(/\u2581/g, " ")
-                    .replace(/Ġ/g, " ")
-                    .replace(/Ċ/g, "\n");
             }
-            tooltipContent += `<div style="padding-left: 8px;">${predToken}: ${pred.log_prob.toFixed(4)}</div>`;
-        });
-        tooltipContent += `<div style="margin-top: 8px; border-top: 1px solid #555; padding-top: 8px;">
-            <div><strong>Stats:</strong></div>
-            <div style="padding-left: 8px;">Percentile: ${percentile.toFixed(2)}</div>
-            <div style="padding-left: 8px;">Log-Likelihood: ${logProb.toFixed(4)}</div>
-        </div>`;
-        tippy(tokenSpan, {
-            content: tooltipContent,
-            allowHTML: true,
-            theme: 'custom',
-            placement: 'top',
-            interactive: true
-        });
-        coloredTextDiv.appendChild(tokenSpan);
-        if (token === "<0x0A>") {
-          coloredTextDiv.appendChild(document.createElement("br"));
         }
-    }
-    document.getElementById("joint-log-likelihood").textContent = data.joint_log_likelihood.toFixed(4);
-    document.getElementById("average-log-likelihood").textContent = data.average_log_likelihood.toFixed(4);
 });
 function getColor(allLogProbs, currentLogProb) {

 document.getElementById("analyze-button").addEventListener("click", async () => {
     const text = document.getElementById("input-text").value;
     const model = document.getElementById("model-select").value;
+    // Show loading state
+    const analyzeButton = document.getElementById("analyze-button");
+    const buttonSpinner = analyzeButton.querySelector(".button-spinner");
+    analyzeButton.classList.add("loading");
+    buttonSpinner.classList.add("visible");
+    analyzeButton.disabled = true;
+    try {
+        const response = await fetch("/analyze", {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json"
+            },
+            body: JSON.stringify({ text, model })
+        });
+        const data = await response.json();
+        const coloredTextDiv = document.getElementById("colored-text");
+        coloredTextDiv.innerHTML = "";
+        // Always add the first token
+        const firstToken = data.tokens[0];
+        const firstTokenSpan = document.createElement("span");
+        firstTokenSpan.classList.add("token");
+        // Handle special tokens and regular tokens differently
+        if (firstToken === "<s>" || firstToken === "<|endoftext|>") {
+            firstTokenSpan.style.backgroundColor = "#808080"; // Gray for special tokens
+            firstTokenSpan.textContent = "■";
+            tippy(firstTokenSpan, {
+                content: "<div><strong>Beginning of Sequence</strong></div>",
+                allowHTML: true,
+                theme: 'custom',
+                placement: 'top',
+                interactive: true
+            });
         } else {
+            // Handle regular first token
+            firstTokenSpan.style.backgroundColor = "#808080"; // or any other color you prefer
+            firstTokenSpan.textContent = firstToken;
+            tippy(firstTokenSpan, {
+                content: `<div><strong>First Token</strong></div>`,
+                allowHTML: true,
+                theme: 'custom',
+                placement: 'top',
+                interactive: true
+            });
         }
+        coloredTextDiv.appendChild(firstTokenSpan);
+        for (let index = 0; index < data.log_probs.length; index++) {
+            const token = data.tokens[index + 1];
+            const percentile = data.percentiles[index];
+            const logProb = data.log_probs[index];
+            const topKPredictions = data.top_k_predictions[index];
+            const color = getColor(data.log_probs, logProb);
+            const tokenSpan = document.createElement("span");
+            tokenSpan.classList.add("token");
+            tokenSpan.style.backgroundColor = color;
+            let displayToken = token;
+            let specialTokenDescription = "";
+            // Enhanced special token handling
+            if (token === "<s>" || token === "<|endoftext|>") {
+                displayToken = "■";
+                specialTokenDescription = "Beginning of Sequence";
+            } else if (token === "</s>" || token === "<|endoftext|>") {
+                displayToken = "■";
+                specialTokenDescription = "End of Sequence";
+            } else if (token === "<0x0A>") {
+                displayToken = "■";
+                specialTokenDescription = "Newline";
+            } else if (token.startsWith("<") && token.endsWith(">")) {
+                displayToken = "■";
+                specialTokenDescription = "Special Token: " + token;
+            } else {
+                // Clean up GPT-2 style tokens (Ġ and Ċ)
+                displayToken = displayToken
+                    .replace(/\u2581/g, " ")  // Replace underscore token
+                    .replace(/Ġ/g, " ")       // Replace GPT-2 space token
+                    .replace(/Ċ/g, "\n");     // Replace GPT-2 newline token
+            }
+            tokenSpan.textContent = displayToken;
+            let tooltipContent = "";
+            if (specialTokenDescription) {
+                tooltipContent += `<div style="font-weight: bold; margin-bottom: 8px;">${specialTokenDescription}</div>`;
             }
+            tooltipContent += `<div style="font-weight: bold; margin-bottom: 4px;">Top 5 Predictions:</div>`;
+            topKPredictions.forEach(pred => {
+                let predToken = pred.token;
+                if (predToken === "<0x0A>") {
+                    predToken = "\\n";
+                } else if (predToken.startsWith("<") && predToken.endsWith(">")) {
+                    predToken = "[SPECIAL]";
+                } else {
+                    predToken = predToken
+                        .replace(/\u2581/g, " ")
+                        .replace(/Ġ/g, " ")
+                        .replace(/Ċ/g, "\n");
+                }
+                tooltipContent += `<div style="padding-left: 8px;">${predToken}: ${pred.log_prob.toFixed(4)}</div>`;
+            });
+            tooltipContent += `<div style="margin-top: 8px; border-top: 1px solid #555; padding-top: 8px;">
+                <div><strong>Stats:</strong></div>
+                <div style="padding-left: 8px;">Percentile: ${percentile.toFixed(2)}</div>
+                <div style="padding-left: 8px;">Log-Likelihood: ${logProb.toFixed(4)}</div>
+            </div>`;
+            tippy(tokenSpan, {
+                content: tooltipContent,
+                allowHTML: true,
+                theme: 'custom',
+                placement: 'top',
+                interactive: true
+            });
+            coloredTextDiv.appendChild(tokenSpan);
+            if (token === "<0x0A>") {
+              coloredTextDiv.appendChild(document.createElement("br"));
+            }
         }
+        document.getElementById("joint-log-likelihood").textContent = data.joint_log_likelihood.toFixed(4);
+        document.getElementById("average-log-likelihood").textContent = data.average_log_likelihood.toFixed(4);
+    } catch (error) {
+        console.error("Error during analysis:", error);
+        alert("An error occurred during analysis. Please try again.");
+    } finally {
+        // Hide loading state
+        analyzeButton.classList.remove("loading");
+        buttonSpinner.classList.remove("visible");
+        analyzeButton.disabled = false;
+    }
 });
 function getColor(allLogProbs, currentLogProb) {

static/style.css CHANGED Viewed

@@ -95,6 +95,45 @@ textarea {
     font-weight: 500;
     cursor: pointer;
     transition: background-color 0.2s;
 }
 .primary-button:hover {

     font-weight: 500;
     cursor: pointer;
     transition: background-color 0.2s;
+    position: relative;
+    min-width: 100px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0.5rem;
+}
+.primary-button:disabled {
+    background-color: var(--text-secondary);
+    cursor: not-allowed;
+}
+.primary-button .button-text {
+    transition: opacity 0.2s;
+}
+.primary-button.loading .button-text {
+    opacity: 0;
+}
+.button-spinner {
+    position: absolute;
+    width: 20px;
+    height: 20px;
+    border: 2px solid rgba(255, 255, 255, 0.3);
+    border-top: 2px solid white;
+    border-radius: 50%;
+    animation: spin 1s linear infinite;
+    display: none;
+}
+.button-spinner.visible {
+    display: block;
+}
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
 }
 .primary-button:hover {

templates/index.html CHANGED Viewed

@@ -30,7 +30,10 @@
                 <textarea id="input-text" placeholder="Enter your text here..."></textarea>
             </div>
-            <button id="analyze-button" class="primary-button">Analyze</button>
         </div>
         <div id="output" class="output-panel">

                 <textarea id="input-text" placeholder="Enter your text here..."></textarea>
             </div>
+            <button id="analyze-button" class="primary-button">
+                <span class="button-text">Analyze</span>
+                <div class="button-spinner hidden"></div>
+            </button>
         </div>
         <div id="output" class="output-panel">