PhilippSpohn commited on
Commit
3bd5de9
·
1 Parent(s): b9b96cc

Initial commit for HF Spaces deployment

Browse files
Files changed (6) hide show
  1. Dockerfile +19 -0
  2. README.md +17 -23
  3. app.py +1 -1
  4. static/script.js +131 -114
  5. static/style.css +39 -0
  6. templates/index.html +4 -1
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Create a non-root user
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ COPY --chown=user . /app
14
+
15
+ # Make port 7860 available (required for HF Spaces)
16
+ EXPOSE 7860
17
+
18
+ # Start the app
19
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,35 +1,29 @@
1
  # Token Probability Analyzer
2
 
3
- A web application that analyzes token probabilities using various language models. This tool helps visualize and understand how language models predict tokens in a given text sequence.
4
 
5
  ## Features
6
 
7
- - Support for multiple language models (GPT-2, TinyLlama, etc.)
8
- - Token-by-token probability analysis
9
- - Percentile scoring for token probabilities
10
- - Top-k predictions for each position
11
- - Joint and average log likelihood calculations
12
-
13
- ## Setup
14
-
15
- 1. Install the required dependencies:
16
- ```bash
17
- pip install -r requirements.txt
18
- ```
19
-
20
- 2. Run the application:
21
- ```bash
22
- python app.py
23
- ```
24
-
25
- 3. Open your browser and navigate to `http://localhost:5000`
26
 
27
  ## Usage
28
 
29
- 1. Select a language model from the dropdown menu
30
  2. Enter your text in the input field
31
- 3. Click "Analyze" to see the token probabilities and predictions
32
 
33
  ## Technical Details
34
 
35
- The application uses Flask for the backend and provides a simple web interface. It leverages the Hugging Face Transformers library to load and run various language models for token probability analysis.
 
 
 
 
 
 
 
 
 
1
  # Token Probability Analyzer
2
 
3
+ This web application allows you to analyze token probabilities and predictions from various language models. It provides insights into how likely each token is according to the model, along with top predictions at each position.
4
 
5
  ## Features
6
 
7
+ - Analyze text using different language models (GPT-2, TinyLlama, etc.)
8
+ - View token-by-token probabilities
9
+ - See percentile scores for each token
10
+ - Explore top-k predictions at each position
11
+ - Calculate joint and average log-likelihood
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  ## Usage
14
 
15
+ 1. Select a model from the dropdown menu
16
  2. Enter your text in the input field
17
+ 3. Click "Analyze" to see the results
18
 
19
  ## Technical Details
20
 
21
+ Built with:
22
+ - Flask
23
+ - Hugging Face Transformers
24
+ - PyTorch
25
+ - JavaScript for interactive visualizations
26
+
27
+ ## Deployment
28
+
29
+ This app is deployed on Hugging Face Spaces using Docker.
app.py CHANGED
@@ -95,4 +95,4 @@ def analyze():
95
  })
96
 
97
  if __name__ == "__main__":
98
- app.run(debug=True)
 
95
  })
96
 
97
  if __name__ == "__main__":
98
+ app.run(host="0.0.0.0", port=7860)
static/script.js CHANGED
@@ -1,131 +1,148 @@
1
  document.getElementById("analyze-button").addEventListener("click", async () => {
2
  const text = document.getElementById("input-text").value;
3
  const model = document.getElementById("model-select").value;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- const response = await fetch("/analyze", {
6
- method: "POST",
7
- headers: {
8
- "Content-Type": "application/json"
9
- },
10
- body: JSON.stringify({ text, model })
11
- });
12
-
13
- const data = await response.json();
14
 
15
- const coloredTextDiv = document.getElementById("colored-text");
16
- coloredTextDiv.innerHTML = "";
17
 
18
- // Always add the first token
19
- const firstToken = data.tokens[0];
20
- const firstTokenSpan = document.createElement("span");
21
- firstTokenSpan.classList.add("token");
22
-
23
- // Handle special tokens and regular tokens differently
24
- if (firstToken === "<s>" || firstToken === "<|endoftext|>") {
25
- firstTokenSpan.style.backgroundColor = "#808080"; // Gray for special tokens
26
- firstTokenSpan.textContent = "■";
27
- tippy(firstTokenSpan, {
28
- content: "<div><strong>Beginning of Sequence</strong></div>",
29
- allowHTML: true,
30
- theme: 'custom',
31
- placement: 'top',
32
- interactive: true
33
- });
34
- } else {
35
- // Handle regular first token
36
- firstTokenSpan.style.backgroundColor = "#808080"; // or any other color you prefer
37
- firstTokenSpan.textContent = firstToken;
38
- tippy(firstTokenSpan, {
39
- content: `<div><strong>First Token</strong></div>`,
40
- allowHTML: true,
41
- theme: 'custom',
42
- placement: 'top',
43
- interactive: true
44
- });
45
- }
46
-
47
- coloredTextDiv.appendChild(firstTokenSpan);
48
-
49
- for (let index = 0; index < data.log_probs.length; index++) {
50
- const token = data.tokens[index + 1];
51
- const percentile = data.percentiles[index];
52
- const logProb = data.log_probs[index];
53
- const topKPredictions = data.top_k_predictions[index];
54
- const color = getColor(data.log_probs, logProb);
55
-
56
- const tokenSpan = document.createElement("span");
57
- tokenSpan.classList.add("token");
58
- tokenSpan.style.backgroundColor = color;
59
-
60
- let displayToken = token;
61
- let specialTokenDescription = "";
62
-
63
- // Enhanced special token handling
64
- if (token === "<s>" || token === "<|endoftext|>") {
65
- displayToken = "■";
66
- specialTokenDescription = "Beginning of Sequence";
67
- } else if (token === "</s>" || token === "<|endoftext|>") {
68
- displayToken = "■";
69
- specialTokenDescription = "End of Sequence";
70
- } else if (token === "<0x0A>") {
71
- displayToken = "■";
72
- specialTokenDescription = "Newline";
73
- } else if (token.startsWith("<") && token.endsWith(">")) {
74
- displayToken = "■";
75
- specialTokenDescription = "Special Token: " + token;
76
  } else {
77
- // Clean up GPT-2 style tokens (Ġ and Ċ)
78
- displayToken = displayToken
79
- .replace(/\u2581/g, " ") // Replace underscore token
80
- .replace(/Ġ/g, " ") // Replace GPT-2 space token
81
- .replace(/Ċ/g, "\n"); // Replace GPT-2 newline token
 
 
 
 
 
82
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- tokenSpan.textContent = displayToken;
85
-
86
- let tooltipContent = "";
87
- if (specialTokenDescription) {
88
- tooltipContent += `<div style="font-weight: bold; margin-bottom: 8px;">${specialTokenDescription}</div>`;
89
- }
90
 
91
- tooltipContent += `<div style="font-weight: bold; margin-bottom: 4px;">Top 5 Predictions:</div>`;
92
- topKPredictions.forEach(pred => {
93
- let predToken = pred.token;
94
- if (predToken === "<0x0A>") {
95
- predToken = "\\n";
96
- } else if (predToken.startsWith("<") && predToken.endsWith(">")) {
97
- predToken = "[SPECIAL]";
98
- } else {
99
- predToken = predToken
100
- .replace(/\u2581/g, " ")
101
- .replace(/Ġ/g, " ")
102
- .replace(/Ċ/g, "\n");
103
  }
104
- tooltipContent += `<div style="padding-left: 8px;">${predToken}: ${pred.log_prob.toFixed(4)}</div>`;
105
- });
106
-
107
- tooltipContent += `<div style="margin-top: 8px; border-top: 1px solid #555; padding-top: 8px;">
108
- <div><strong>Stats:</strong></div>
109
- <div style="padding-left: 8px;">Percentile: ${percentile.toFixed(2)}</div>
110
- <div style="padding-left: 8px;">Log-Likelihood: ${logProb.toFixed(4)}</div>
111
- </div>`;
112
-
113
- tippy(tokenSpan, {
114
- content: tooltipContent,
115
- allowHTML: true,
116
- theme: 'custom',
117
- placement: 'top',
118
- interactive: true
119
- });
120
 
121
- coloredTextDiv.appendChild(tokenSpan);
122
- if (token === "<0x0A>") {
123
- coloredTextDiv.appendChild(document.createElement("br"));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
- }
126
 
127
- document.getElementById("joint-log-likelihood").textContent = data.joint_log_likelihood.toFixed(4);
128
- document.getElementById("average-log-likelihood").textContent = data.average_log_likelihood.toFixed(4);
 
 
 
 
 
 
 
 
 
129
  });
130
 
131
  function getColor(allLogProbs, currentLogProb) {
 
1
  document.getElementById("analyze-button").addEventListener("click", async () => {
2
  const text = document.getElementById("input-text").value;
3
  const model = document.getElementById("model-select").value;
4
+
5
+ // Show loading state
6
+ const analyzeButton = document.getElementById("analyze-button");
7
+ const buttonSpinner = analyzeButton.querySelector(".button-spinner");
8
+ analyzeButton.classList.add("loading");
9
+ buttonSpinner.classList.add("visible");
10
+ analyzeButton.disabled = true;
11
+
12
+ try {
13
+ const response = await fetch("/analyze", {
14
+ method: "POST",
15
+ headers: {
16
+ "Content-Type": "application/json"
17
+ },
18
+ body: JSON.stringify({ text, model })
19
+ });
20
 
21
+ const data = await response.json();
 
 
 
 
 
 
 
 
22
 
23
+ const coloredTextDiv = document.getElementById("colored-text");
24
+ coloredTextDiv.innerHTML = "";
25
 
26
+ // Always add the first token
27
+ const firstToken = data.tokens[0];
28
+ const firstTokenSpan = document.createElement("span");
29
+ firstTokenSpan.classList.add("token");
30
+
31
+ // Handle special tokens and regular tokens differently
32
+ if (firstToken === "<s>" || firstToken === "<|endoftext|>") {
33
+ firstTokenSpan.style.backgroundColor = "#808080"; // Gray for special tokens
34
+ firstTokenSpan.textContent = "■";
35
+ tippy(firstTokenSpan, {
36
+ content: "<div><strong>Beginning of Sequence</strong></div>",
37
+ allowHTML: true,
38
+ theme: 'custom',
39
+ placement: 'top',
40
+ interactive: true
41
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  } else {
43
+ // Handle regular first token
44
+ firstTokenSpan.style.backgroundColor = "#808080"; // or any other color you prefer
45
+ firstTokenSpan.textContent = firstToken;
46
+ tippy(firstTokenSpan, {
47
+ content: `<div><strong>First Token</strong></div>`,
48
+ allowHTML: true,
49
+ theme: 'custom',
50
+ placement: 'top',
51
+ interactive: true
52
+ });
53
  }
54
+
55
+ coloredTextDiv.appendChild(firstTokenSpan);
56
+
57
+ for (let index = 0; index < data.log_probs.length; index++) {
58
+ const token = data.tokens[index + 1];
59
+ const percentile = data.percentiles[index];
60
+ const logProb = data.log_probs[index];
61
+ const topKPredictions = data.top_k_predictions[index];
62
+ const color = getColor(data.log_probs, logProb);
63
+
64
+ const tokenSpan = document.createElement("span");
65
+ tokenSpan.classList.add("token");
66
+ tokenSpan.style.backgroundColor = color;
67
+
68
+ let displayToken = token;
69
+ let specialTokenDescription = "";
70
+
71
+ // Enhanced special token handling
72
+ if (token === "<s>" || token === "<|endoftext|>") {
73
+ displayToken = "■";
74
+ specialTokenDescription = "Beginning of Sequence";
75
+ } else if (token === "</s>" || token === "<|endoftext|>") {
76
+ displayToken = "■";
77
+ specialTokenDescription = "End of Sequence";
78
+ } else if (token === "<0x0A>") {
79
+ displayToken = "■";
80
+ specialTokenDescription = "Newline";
81
+ } else if (token.startsWith("<") && token.endsWith(">")) {
82
+ displayToken = "■";
83
+ specialTokenDescription = "Special Token: " + token;
84
+ } else {
85
+ // Clean up GPT-2 style tokens (Ġ and Ċ)
86
+ displayToken = displayToken
87
+ .replace(/\u2581/g, " ") // Replace underscore token
88
+ .replace(/Ġ/g, " ") // Replace GPT-2 space token
89
+ .replace(/Ċ/g, "\n"); // Replace GPT-2 newline token
90
+ }
91
 
92
+ tokenSpan.textContent = displayToken;
 
 
 
 
 
93
 
94
+ let tooltipContent = "";
95
+ if (specialTokenDescription) {
96
+ tooltipContent += `<div style="font-weight: bold; margin-bottom: 8px;">${specialTokenDescription}</div>`;
 
 
 
 
 
 
 
 
 
97
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ tooltipContent += `<div style="font-weight: bold; margin-bottom: 4px;">Top 5 Predictions:</div>`;
100
+ topKPredictions.forEach(pred => {
101
+ let predToken = pred.token;
102
+ if (predToken === "<0x0A>") {
103
+ predToken = "\\n";
104
+ } else if (predToken.startsWith("<") && predToken.endsWith(">")) {
105
+ predToken = "[SPECIAL]";
106
+ } else {
107
+ predToken = predToken
108
+ .replace(/\u2581/g, " ")
109
+ .replace(/Ġ/g, " ")
110
+ .replace(/Ċ/g, "\n");
111
+ }
112
+ tooltipContent += `<div style="padding-left: 8px;">${predToken}: ${pred.log_prob.toFixed(4)}</div>`;
113
+ });
114
+
115
+ tooltipContent += `<div style="margin-top: 8px; border-top: 1px solid #555; padding-top: 8px;">
116
+ <div><strong>Stats:</strong></div>
117
+ <div style="padding-left: 8px;">Percentile: ${percentile.toFixed(2)}</div>
118
+ <div style="padding-left: 8px;">Log-Likelihood: ${logProb.toFixed(4)}</div>
119
+ </div>`;
120
+
121
+ tippy(tokenSpan, {
122
+ content: tooltipContent,
123
+ allowHTML: true,
124
+ theme: 'custom',
125
+ placement: 'top',
126
+ interactive: true
127
+ });
128
+
129
+ coloredTextDiv.appendChild(tokenSpan);
130
+ if (token === "<0x0A>") {
131
+ coloredTextDiv.appendChild(document.createElement("br"));
132
+ }
133
  }
 
134
 
135
+ document.getElementById("joint-log-likelihood").textContent = data.joint_log_likelihood.toFixed(4);
136
+ document.getElementById("average-log-likelihood").textContent = data.average_log_likelihood.toFixed(4);
137
+ } catch (error) {
138
+ console.error("Error during analysis:", error);
139
+ alert("An error occurred during analysis. Please try again.");
140
+ } finally {
141
+ // Hide loading state
142
+ analyzeButton.classList.remove("loading");
143
+ buttonSpinner.classList.remove("visible");
144
+ analyzeButton.disabled = false;
145
+ }
146
  });
147
 
148
  function getColor(allLogProbs, currentLogProb) {
static/style.css CHANGED
@@ -95,6 +95,45 @@ textarea {
95
  font-weight: 500;
96
  cursor: pointer;
97
  transition: background-color 0.2s;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  }
99
 
100
  .primary-button:hover {
 
95
  font-weight: 500;
96
  cursor: pointer;
97
  transition: background-color 0.2s;
98
+ position: relative;
99
+ min-width: 100px;
100
+ display: flex;
101
+ align-items: center;
102
+ justify-content: center;
103
+ gap: 0.5rem;
104
+ }
105
+
106
+ .primary-button:disabled {
107
+ background-color: var(--text-secondary);
108
+ cursor: not-allowed;
109
+ }
110
+
111
+ .primary-button .button-text {
112
+ transition: opacity 0.2s;
113
+ }
114
+
115
+ .primary-button.loading .button-text {
116
+ opacity: 0;
117
+ }
118
+
119
+ .button-spinner {
120
+ position: absolute;
121
+ width: 20px;
122
+ height: 20px;
123
+ border: 2px solid rgba(255, 255, 255, 0.3);
124
+ border-top: 2px solid white;
125
+ border-radius: 50%;
126
+ animation: spin 1s linear infinite;
127
+ display: none;
128
+ }
129
+
130
+ .button-spinner.visible {
131
+ display: block;
132
+ }
133
+
134
+ @keyframes spin {
135
+ 0% { transform: rotate(0deg); }
136
+ 100% { transform: rotate(360deg); }
137
  }
138
 
139
  .primary-button:hover {
templates/index.html CHANGED
@@ -30,7 +30,10 @@
30
  <textarea id="input-text" placeholder="Enter your text here..."></textarea>
31
  </div>
32
 
33
- <button id="analyze-button" class="primary-button">Analyze</button>
 
 
 
34
  </div>
35
 
36
  <div id="output" class="output-panel">
 
30
  <textarea id="input-text" placeholder="Enter your text here..."></textarea>
31
  </div>
32
 
33
+ <button id="analyze-button" class="primary-button">
34
+ <span class="button-text">Analyze</span>
35
+ <div class="button-spinner hidden"></div>
36
+ </button>
37
  </div>
38
 
39
  <div id="output" class="output-panel">