Spaces:
Running
Running
Commit
·
3bd5de9
1
Parent(s):
b9b96cc
Initial commit for HF Spaces deployment
Browse files- Dockerfile +19 -0
- README.md +17 -23
- app.py +1 -1
- static/script.js +131 -114
- static/style.css +39 -0
- templates/index.html +4 -1
Dockerfile
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
# Create a non-root user
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
USER user
|
6 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
12 |
+
|
13 |
+
COPY --chown=user . /app
|
14 |
+
|
15 |
+
# Make port 7860 available (required for HF Spaces)
|
16 |
+
EXPOSE 7860
|
17 |
+
|
18 |
+
# Start the app
|
19 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,35 +1,29 @@
|
|
1 |
# Token Probability Analyzer
|
2 |
|
3 |
-
|
4 |
|
5 |
## Features
|
6 |
|
7 |
-
-
|
8 |
-
-
|
9 |
-
-
|
10 |
-
-
|
11 |
-
-
|
12 |
-
|
13 |
-
## Setup
|
14 |
-
|
15 |
-
1. Install the required dependencies:
|
16 |
-
```bash
|
17 |
-
pip install -r requirements.txt
|
18 |
-
```
|
19 |
-
|
20 |
-
2. Run the application:
|
21 |
-
```bash
|
22 |
-
python app.py
|
23 |
-
```
|
24 |
-
|
25 |
-
3. Open your browser and navigate to `http://localhost:5000`
|
26 |
|
27 |
## Usage
|
28 |
|
29 |
-
1. Select a
|
30 |
2. Enter your text in the input field
|
31 |
-
3. Click "Analyze" to see the
|
32 |
|
33 |
## Technical Details
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Token Probability Analyzer
|
2 |
|
3 |
+
This web application allows you to analyze token probabilities and predictions from various language models. It provides insights into how likely each token is according to the model, along with top predictions at each position.
|
4 |
|
5 |
## Features
|
6 |
|
7 |
+
- Analyze text using different language models (GPT-2, TinyLlama, etc.)
|
8 |
+
- View token-by-token probabilities
|
9 |
+
- See percentile scores for each token
|
10 |
+
- Explore top-k predictions at each position
|
11 |
+
- Calculate joint and average log-likelihood
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
## Usage
|
14 |
|
15 |
+
1. Select a model from the dropdown menu
|
16 |
2. Enter your text in the input field
|
17 |
+
3. Click "Analyze" to see the results
|
18 |
|
19 |
## Technical Details
|
20 |
|
21 |
+
Built with:
|
22 |
+
- Flask
|
23 |
+
- Hugging Face Transformers
|
24 |
+
- PyTorch
|
25 |
+
- JavaScript for interactive visualizations
|
26 |
+
|
27 |
+
## Deployment
|
28 |
+
|
29 |
+
This app is deployed on Hugging Face Spaces using Docker.
|
app.py
CHANGED
@@ -95,4 +95,4 @@ def analyze():
|
|
95 |
})
|
96 |
|
97 |
if __name__ == "__main__":
|
98 |
-
app.run(
|
|
|
95 |
})
|
96 |
|
97 |
if __name__ == "__main__":
|
98 |
+
app.run(host="0.0.0.0", port=7860)
|
static/script.js
CHANGED
@@ -1,131 +1,148 @@
|
|
1 |
document.getElementById("analyze-button").addEventListener("click", async () => {
|
2 |
const text = document.getElementById("input-text").value;
|
3 |
const model = document.getElementById("model-select").value;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
6 |
-
method: "POST",
|
7 |
-
headers: {
|
8 |
-
"Content-Type": "application/json"
|
9 |
-
},
|
10 |
-
body: JSON.stringify({ text, model })
|
11 |
-
});
|
12 |
-
|
13 |
-
const data = await response.json();
|
14 |
|
15 |
-
|
16 |
-
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
} else {
|
35 |
-
// Handle regular first token
|
36 |
-
firstTokenSpan.style.backgroundColor = "#808080"; // or any other color you prefer
|
37 |
-
firstTokenSpan.textContent = firstToken;
|
38 |
-
tippy(firstTokenSpan, {
|
39 |
-
content: `<div><strong>First Token</strong></div>`,
|
40 |
-
allowHTML: true,
|
41 |
-
theme: 'custom',
|
42 |
-
placement: 'top',
|
43 |
-
interactive: true
|
44 |
-
});
|
45 |
-
}
|
46 |
-
|
47 |
-
coloredTextDiv.appendChild(firstTokenSpan);
|
48 |
-
|
49 |
-
for (let index = 0; index < data.log_probs.length; index++) {
|
50 |
-
const token = data.tokens[index + 1];
|
51 |
-
const percentile = data.percentiles[index];
|
52 |
-
const logProb = data.log_probs[index];
|
53 |
-
const topKPredictions = data.top_k_predictions[index];
|
54 |
-
const color = getColor(data.log_probs, logProb);
|
55 |
-
|
56 |
-
const tokenSpan = document.createElement("span");
|
57 |
-
tokenSpan.classList.add("token");
|
58 |
-
tokenSpan.style.backgroundColor = color;
|
59 |
-
|
60 |
-
let displayToken = token;
|
61 |
-
let specialTokenDescription = "";
|
62 |
-
|
63 |
-
// Enhanced special token handling
|
64 |
-
if (token === "<s>" || token === "<|endoftext|>") {
|
65 |
-
displayToken = "■";
|
66 |
-
specialTokenDescription = "Beginning of Sequence";
|
67 |
-
} else if (token === "</s>" || token === "<|endoftext|>") {
|
68 |
-
displayToken = "■";
|
69 |
-
specialTokenDescription = "End of Sequence";
|
70 |
-
} else if (token === "<0x0A>") {
|
71 |
-
displayToken = "■";
|
72 |
-
specialTokenDescription = "Newline";
|
73 |
-
} else if (token.startsWith("<") && token.endsWith(">")) {
|
74 |
-
displayToken = "■";
|
75 |
-
specialTokenDescription = "Special Token: " + token;
|
76 |
} else {
|
77 |
-
//
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
82 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
let tooltipContent = "";
|
87 |
-
if (specialTokenDescription) {
|
88 |
-
tooltipContent += `<div style="font-weight: bold; margin-bottom: 8px;">${specialTokenDescription}</div>`;
|
89 |
-
}
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
if (predToken === "<0x0A>") {
|
95 |
-
predToken = "\\n";
|
96 |
-
} else if (predToken.startsWith("<") && predToken.endsWith(">")) {
|
97 |
-
predToken = "[SPECIAL]";
|
98 |
-
} else {
|
99 |
-
predToken = predToken
|
100 |
-
.replace(/\u2581/g, " ")
|
101 |
-
.replace(/Ġ/g, " ")
|
102 |
-
.replace(/Ċ/g, "\n");
|
103 |
}
|
104 |
-
tooltipContent += `<div style="padding-left: 8px;">${predToken}: ${pred.log_prob.toFixed(4)}</div>`;
|
105 |
-
});
|
106 |
-
|
107 |
-
tooltipContent += `<div style="margin-top: 8px; border-top: 1px solid #555; padding-top: 8px;">
|
108 |
-
<div><strong>Stats:</strong></div>
|
109 |
-
<div style="padding-left: 8px;">Percentile: ${percentile.toFixed(2)}</div>
|
110 |
-
<div style="padding-left: 8px;">Log-Likelihood: ${logProb.toFixed(4)}</div>
|
111 |
-
</div>`;
|
112 |
-
|
113 |
-
tippy(tokenSpan, {
|
114 |
-
content: tooltipContent,
|
115 |
-
allowHTML: true,
|
116 |
-
theme: 'custom',
|
117 |
-
placement: 'top',
|
118 |
-
interactive: true
|
119 |
-
});
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
}
|
125 |
-
}
|
126 |
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
});
|
130 |
|
131 |
function getColor(allLogProbs, currentLogProb) {
|
|
|
1 |
document.getElementById("analyze-button").addEventListener("click", async () => {
|
2 |
const text = document.getElementById("input-text").value;
|
3 |
const model = document.getElementById("model-select").value;
|
4 |
+
|
5 |
+
// Show loading state
|
6 |
+
const analyzeButton = document.getElementById("analyze-button");
|
7 |
+
const buttonSpinner = analyzeButton.querySelector(".button-spinner");
|
8 |
+
analyzeButton.classList.add("loading");
|
9 |
+
buttonSpinner.classList.add("visible");
|
10 |
+
analyzeButton.disabled = true;
|
11 |
+
|
12 |
+
try {
|
13 |
+
const response = await fetch("/analyze", {
|
14 |
+
method: "POST",
|
15 |
+
headers: {
|
16 |
+
"Content-Type": "application/json"
|
17 |
+
},
|
18 |
+
body: JSON.stringify({ text, model })
|
19 |
+
});
|
20 |
|
21 |
+
const data = await response.json();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
const coloredTextDiv = document.getElementById("colored-text");
|
24 |
+
coloredTextDiv.innerHTML = "";
|
25 |
|
26 |
+
// Always add the first token
|
27 |
+
const firstToken = data.tokens[0];
|
28 |
+
const firstTokenSpan = document.createElement("span");
|
29 |
+
firstTokenSpan.classList.add("token");
|
30 |
+
|
31 |
+
// Handle special tokens and regular tokens differently
|
32 |
+
if (firstToken === "<s>" || firstToken === "<|endoftext|>") {
|
33 |
+
firstTokenSpan.style.backgroundColor = "#808080"; // Gray for special tokens
|
34 |
+
firstTokenSpan.textContent = "■";
|
35 |
+
tippy(firstTokenSpan, {
|
36 |
+
content: "<div><strong>Beginning of Sequence</strong></div>",
|
37 |
+
allowHTML: true,
|
38 |
+
theme: 'custom',
|
39 |
+
placement: 'top',
|
40 |
+
interactive: true
|
41 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
} else {
|
43 |
+
// Handle regular first token
|
44 |
+
firstTokenSpan.style.backgroundColor = "#808080"; // or any other color you prefer
|
45 |
+
firstTokenSpan.textContent = firstToken;
|
46 |
+
tippy(firstTokenSpan, {
|
47 |
+
content: `<div><strong>First Token</strong></div>`,
|
48 |
+
allowHTML: true,
|
49 |
+
theme: 'custom',
|
50 |
+
placement: 'top',
|
51 |
+
interactive: true
|
52 |
+
});
|
53 |
}
|
54 |
+
|
55 |
+
coloredTextDiv.appendChild(firstTokenSpan);
|
56 |
+
|
57 |
+
for (let index = 0; index < data.log_probs.length; index++) {
|
58 |
+
const token = data.tokens[index + 1];
|
59 |
+
const percentile = data.percentiles[index];
|
60 |
+
const logProb = data.log_probs[index];
|
61 |
+
const topKPredictions = data.top_k_predictions[index];
|
62 |
+
const color = getColor(data.log_probs, logProb);
|
63 |
+
|
64 |
+
const tokenSpan = document.createElement("span");
|
65 |
+
tokenSpan.classList.add("token");
|
66 |
+
tokenSpan.style.backgroundColor = color;
|
67 |
+
|
68 |
+
let displayToken = token;
|
69 |
+
let specialTokenDescription = "";
|
70 |
+
|
71 |
+
// Enhanced special token handling
|
72 |
+
if (token === "<s>" || token === "<|endoftext|>") {
|
73 |
+
displayToken = "■";
|
74 |
+
specialTokenDescription = "Beginning of Sequence";
|
75 |
+
} else if (token === "</s>" || token === "<|endoftext|>") {
|
76 |
+
displayToken = "■";
|
77 |
+
specialTokenDescription = "End of Sequence";
|
78 |
+
} else if (token === "<0x0A>") {
|
79 |
+
displayToken = "■";
|
80 |
+
specialTokenDescription = "Newline";
|
81 |
+
} else if (token.startsWith("<") && token.endsWith(">")) {
|
82 |
+
displayToken = "■";
|
83 |
+
specialTokenDescription = "Special Token: " + token;
|
84 |
+
} else {
|
85 |
+
// Clean up GPT-2 style tokens (Ġ and Ċ)
|
86 |
+
displayToken = displayToken
|
87 |
+
.replace(/\u2581/g, " ") // Replace underscore token
|
88 |
+
.replace(/Ġ/g, " ") // Replace GPT-2 space token
|
89 |
+
.replace(/Ċ/g, "\n"); // Replace GPT-2 newline token
|
90 |
+
}
|
91 |
|
92 |
+
tokenSpan.textContent = displayToken;
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
let tooltipContent = "";
|
95 |
+
if (specialTokenDescription) {
|
96 |
+
tooltipContent += `<div style="font-weight: bold; margin-bottom: 8px;">${specialTokenDescription}</div>`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
tooltipContent += `<div style="font-weight: bold; margin-bottom: 4px;">Top 5 Predictions:</div>`;
|
100 |
+
topKPredictions.forEach(pred => {
|
101 |
+
let predToken = pred.token;
|
102 |
+
if (predToken === "<0x0A>") {
|
103 |
+
predToken = "\\n";
|
104 |
+
} else if (predToken.startsWith("<") && predToken.endsWith(">")) {
|
105 |
+
predToken = "[SPECIAL]";
|
106 |
+
} else {
|
107 |
+
predToken = predToken
|
108 |
+
.replace(/\u2581/g, " ")
|
109 |
+
.replace(/Ġ/g, " ")
|
110 |
+
.replace(/Ċ/g, "\n");
|
111 |
+
}
|
112 |
+
tooltipContent += `<div style="padding-left: 8px;">${predToken}: ${pred.log_prob.toFixed(4)}</div>`;
|
113 |
+
});
|
114 |
+
|
115 |
+
tooltipContent += `<div style="margin-top: 8px; border-top: 1px solid #555; padding-top: 8px;">
|
116 |
+
<div><strong>Stats:</strong></div>
|
117 |
+
<div style="padding-left: 8px;">Percentile: ${percentile.toFixed(2)}</div>
|
118 |
+
<div style="padding-left: 8px;">Log-Likelihood: ${logProb.toFixed(4)}</div>
|
119 |
+
</div>`;
|
120 |
+
|
121 |
+
tippy(tokenSpan, {
|
122 |
+
content: tooltipContent,
|
123 |
+
allowHTML: true,
|
124 |
+
theme: 'custom',
|
125 |
+
placement: 'top',
|
126 |
+
interactive: true
|
127 |
+
});
|
128 |
+
|
129 |
+
coloredTextDiv.appendChild(tokenSpan);
|
130 |
+
if (token === "<0x0A>") {
|
131 |
+
coloredTextDiv.appendChild(document.createElement("br"));
|
132 |
+
}
|
133 |
}
|
|
|
134 |
|
135 |
+
document.getElementById("joint-log-likelihood").textContent = data.joint_log_likelihood.toFixed(4);
|
136 |
+
document.getElementById("average-log-likelihood").textContent = data.average_log_likelihood.toFixed(4);
|
137 |
+
} catch (error) {
|
138 |
+
console.error("Error during analysis:", error);
|
139 |
+
alert("An error occurred during analysis. Please try again.");
|
140 |
+
} finally {
|
141 |
+
// Hide loading state
|
142 |
+
analyzeButton.classList.remove("loading");
|
143 |
+
buttonSpinner.classList.remove("visible");
|
144 |
+
analyzeButton.disabled = false;
|
145 |
+
}
|
146 |
});
|
147 |
|
148 |
function getColor(allLogProbs, currentLogProb) {
|
static/style.css
CHANGED
@@ -95,6 +95,45 @@ textarea {
|
|
95 |
font-weight: 500;
|
96 |
cursor: pointer;
|
97 |
transition: background-color 0.2s;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
}
|
99 |
|
100 |
.primary-button:hover {
|
|
|
95 |
font-weight: 500;
|
96 |
cursor: pointer;
|
97 |
transition: background-color 0.2s;
|
98 |
+
position: relative;
|
99 |
+
min-width: 100px;
|
100 |
+
display: flex;
|
101 |
+
align-items: center;
|
102 |
+
justify-content: center;
|
103 |
+
gap: 0.5rem;
|
104 |
+
}
|
105 |
+
|
106 |
+
.primary-button:disabled {
|
107 |
+
background-color: var(--text-secondary);
|
108 |
+
cursor: not-allowed;
|
109 |
+
}
|
110 |
+
|
111 |
+
.primary-button .button-text {
|
112 |
+
transition: opacity 0.2s;
|
113 |
+
}
|
114 |
+
|
115 |
+
.primary-button.loading .button-text {
|
116 |
+
opacity: 0;
|
117 |
+
}
|
118 |
+
|
119 |
+
.button-spinner {
|
120 |
+
position: absolute;
|
121 |
+
width: 20px;
|
122 |
+
height: 20px;
|
123 |
+
border: 2px solid rgba(255, 255, 255, 0.3);
|
124 |
+
border-top: 2px solid white;
|
125 |
+
border-radius: 50%;
|
126 |
+
animation: spin 1s linear infinite;
|
127 |
+
display: none;
|
128 |
+
}
|
129 |
+
|
130 |
+
.button-spinner.visible {
|
131 |
+
display: block;
|
132 |
+
}
|
133 |
+
|
134 |
+
@keyframes spin {
|
135 |
+
0% { transform: rotate(0deg); }
|
136 |
+
100% { transform: rotate(360deg); }
|
137 |
}
|
138 |
|
139 |
.primary-button:hover {
|
templates/index.html
CHANGED
@@ -30,7 +30,10 @@
|
|
30 |
<textarea id="input-text" placeholder="Enter your text here..."></textarea>
|
31 |
</div>
|
32 |
|
33 |
-
<button id="analyze-button" class="primary-button">
|
|
|
|
|
|
|
34 |
</div>
|
35 |
|
36 |
<div id="output" class="output-panel">
|
|
|
30 |
<textarea id="input-text" placeholder="Enter your text here..."></textarea>
|
31 |
</div>
|
32 |
|
33 |
+
<button id="analyze-button" class="primary-button">
|
34 |
+
<span class="button-text">Analyze</span>
|
35 |
+
<div class="button-spinner hidden"></div>
|
36 |
+
</button>
|
37 |
</div>
|
38 |
|
39 |
<div id="output" class="output-panel">
|