Spaces:
Runtime error
Runtime error
clean up
Browse files- .gitignore +2 -1
- README.md +2 -4
- app.py +27 -6
- main.py +0 -3
- requirements.txt +3 -2
- templates/index.html +103 -2
.gitignore
CHANGED
|
@@ -24,4 +24,5 @@ htmlcov/
|
|
| 24 |
.coverage.*
|
| 25 |
*,cover
|
| 26 |
venv
|
| 27 |
-
*_cache.json
|
|
|
|
|
|
| 24 |
.coverage.*
|
| 25 |
*,cover
|
| 26 |
venv
|
| 27 |
+
*_cache.json
|
| 28 |
+
flask_session/
|
README.md
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
-
|
| 11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Sentence Embeddings Visualization
|
| 3 |
+
emoji: 📈
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
---
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from umap_reducer import UMAPReducer
|
| 2 |
from embeddings_encoder import EmbeddingsEncoder
|
| 3 |
-
from flask import Flask, request, render_template, jsonify, make_response
|
| 4 |
-
from
|
|
|
|
| 5 |
import os
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
import feedparser
|
|
@@ -10,14 +11,22 @@ from dateutil import parser
|
|
| 10 |
import re
|
| 11 |
import numpy as np
|
| 12 |
import gzip
|
|
|
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
|
| 17 |
app = Flask(__name__, static_url_path='/static')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
reducer = UMAPReducer()
|
| 19 |
encoder = EmbeddingsEncoder()
|
| 20 |
-
CORS(app)
|
| 21 |
|
| 22 |
|
| 23 |
@app.route('/')
|
|
@@ -26,15 +35,27 @@ def index():
|
|
| 26 |
|
| 27 |
|
| 28 |
@app.route('/run-umap', methods=['POST'])
|
|
|
|
| 29 |
def run_umap():
|
| 30 |
input_data = request.get_json()
|
| 31 |
sentences = input_data['data']['sentences']
|
| 32 |
umap_options = input_data['data']['umap_options']
|
| 33 |
cluster_options = input_data['data']['cluster_options']
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
print("input options:",
|
|
|
|
| 36 |
try:
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# UMAP embeddings
|
| 39 |
reducer.setParams(umap_options, cluster_options)
|
| 40 |
umap_embeddings = reducer.embed(embeddings)
|
|
@@ -51,7 +72,7 @@ def run_umap():
|
|
| 51 |
response.headers['Content-Encoding'] = 'gzip'
|
| 52 |
return response
|
| 53 |
except Exception as e:
|
| 54 |
-
return jsonify({"error": str(e)}),
|
| 55 |
|
| 56 |
|
| 57 |
if __name__ == '__main__':
|
|
|
|
| 1 |
from umap_reducer import UMAPReducer
|
| 2 |
from embeddings_encoder import EmbeddingsEncoder
|
| 3 |
+
from flask import Flask, request, render_template, jsonify, make_response, session
|
| 4 |
+
from flask_session import Session
|
| 5 |
+
from flask_cors import CORS, cross_origin
|
| 6 |
import os
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
import feedparser
|
|
|
|
| 11 |
import re
|
| 12 |
import numpy as np
|
| 13 |
import gzip
|
| 14 |
+
import hashlib
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
|
| 19 |
app = Flask(__name__, static_url_path='/static')
|
| 20 |
+
app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY")
|
| 21 |
+
app.config["SESSION_PERMANENT"] = True
|
| 22 |
+
app.config["SESSION_TYPE"] = "filesystem"
|
| 23 |
+
app.config["SESSION_COOKIE_SAMESITE"] = "None"
|
| 24 |
+
app.config["SESSION_COOKIE_SECURE"] = True
|
| 25 |
+
Session(app)
|
| 26 |
+
CORS(app)
|
| 27 |
+
|
| 28 |
reducer = UMAPReducer()
|
| 29 |
encoder = EmbeddingsEncoder()
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
@app.route('/')
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
@app.route('/run-umap', methods=['POST'])
|
| 38 |
+
@cross_origin(supports_credentials=True)
|
| 39 |
def run_umap():
|
| 40 |
input_data = request.get_json()
|
| 41 |
sentences = input_data['data']['sentences']
|
| 42 |
umap_options = input_data['data']['umap_options']
|
| 43 |
cluster_options = input_data['data']['cluster_options']
|
| 44 |
+
# create unique hash for input, avoid recalculating embeddings
|
| 45 |
+
sentences_input_hash = hashlib.sha256(
|
| 46 |
+
''.join(sentences).encode("utf-8")).hexdigest()
|
| 47 |
|
| 48 |
+
print("input options:", sentences_input_hash,
|
| 49 |
+
umap_options, cluster_options, "\n\n")
|
| 50 |
try:
|
| 51 |
+
if not session.get(sentences_input_hash):
|
| 52 |
+
print("New input, calculating embeddings" "\n\n")
|
| 53 |
+
embeddings = encoder.encode(sentences)
|
| 54 |
+
session[sentences_input_hash] = embeddings.tolist()
|
| 55 |
+
else:
|
| 56 |
+
print("Input already calculated, using cached embeddings", "\n\n")
|
| 57 |
+
embeddings = session[sentences_input_hash]
|
| 58 |
+
|
| 59 |
# UMAP embeddings
|
| 60 |
reducer.setParams(umap_options, cluster_options)
|
| 61 |
umap_embeddings = reducer.embed(embeddings)
|
|
|
|
| 72 |
response.headers['Content-Encoding'] = 'gzip'
|
| 73 |
return response
|
| 74 |
except Exception as e:
|
| 75 |
+
return jsonify({"error": str(e)}), 400
|
| 76 |
|
| 77 |
|
| 78 |
if __name__ == '__main__':
|
main.py
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
import subprocess
|
| 2 |
-
|
| 3 |
-
subprocess.run(["make", "build-all"], shell=False)
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
feedparser==6.0.8
|
| 2 |
Flask==2.0.3
|
| 3 |
flask_cors==3.0.10
|
|
|
|
| 4 |
hdbscan==0.8.28
|
| 5 |
numpy==1.22.2
|
| 6 |
python-dotenv==0.19.2
|
| 7 |
python_dateutil==2.8.2
|
|
|
|
| 8 |
transformers==4.16.2
|
| 9 |
-
umap-learn==0.5.2
|
| 10 |
-
torch
|
|
|
|
| 1 |
feedparser==6.0.8
|
| 2 |
Flask==2.0.3
|
| 3 |
flask_cors==3.0.10
|
| 4 |
+
flask_session==0.4.0
|
| 5 |
hdbscan==0.8.28
|
| 6 |
numpy==1.22.2
|
| 7 |
python-dotenv==0.19.2
|
| 8 |
python_dateutil==2.8.2
|
| 9 |
+
torch==1.10.2
|
| 10 |
transformers==4.16.2
|
| 11 |
+
umap-learn==0.5.2
|
|
|
templates/index.html
CHANGED
|
@@ -11,16 +11,117 @@
|
|
| 11 |
rel="stylesheet"
|
| 12 |
href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
|
| 13 |
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
</head>
|
| 15 |
<body>
|
| 16 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
<script type="module">
|
| 18 |
import {
|
| 19 |
Runtime,
|
| 20 |
Inspector,
|
| 21 |
} from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
|
| 22 |
import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
|
| 23 |
-
new Runtime().module(define,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
</script>
|
| 25 |
</body>
|
| 26 |
</html>
|
|
|
|
| 11 |
rel="stylesheet"
|
| 12 |
href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
|
| 13 |
/>
|
| 14 |
+
<style>
|
| 15 |
+
@import url("https://fonts.googleapis.com/css2?family=Roboto&family=Source+Serif+4:wght@700&display=swap");
|
| 16 |
+
</style>
|
| 17 |
+
<style>
|
| 18 |
+
.mw8 {
|
| 19 |
+
max-width: 64rem;
|
| 20 |
+
}
|
| 21 |
+
.mx-auto {
|
| 22 |
+
margin-left: auto;
|
| 23 |
+
margin-right: auto;
|
| 24 |
+
}
|
| 25 |
+
.ph3 {
|
| 26 |
+
padding-left: 1rem;
|
| 27 |
+
padding-right: 1rem;
|
| 28 |
+
}
|
| 29 |
+
.measure-wide {
|
| 30 |
+
max-width: 34em;
|
| 31 |
+
}
|
| 32 |
+
* {
|
| 33 |
+
font-family: "Roboto", sans-serif;
|
| 34 |
+
}
|
| 35 |
+
h1,
|
| 36 |
+
h2,
|
| 37 |
+
h3,
|
| 38 |
+
h4,
|
| 39 |
+
h5,
|
| 40 |
+
h6 {
|
| 41 |
+
font-family: "Source Serif 4", serif;
|
| 42 |
+
}
|
| 43 |
+
</style>
|
| 44 |
</head>
|
| 45 |
<body>
|
| 46 |
+
<article class="mw8 mx-auto ph3 sans-serif">
|
| 47 |
+
<div class="measure-wide" id="observablehq-intro-7dbb745c"></div>
|
| 48 |
+
<div id="observablehq-viewof-sentences-7dbb745c"></div>
|
| 49 |
+
<div id="observablehq-viewof-params-7dbb745c"></div>
|
| 50 |
+
<div id="observablehq-viewof-tryme-7dbb745c"></div>
|
| 51 |
+
<div id="observablehq-viewof-clear-7dbb745c"></div>
|
| 52 |
+
<div id="observablehq-dialog-7dbb745c"></div>
|
| 53 |
+
<div id="observablehq-viewof-scatter-7dbb745c"></div>
|
| 54 |
+
<div id="observablehq-umapoptions-7dbb745c"></div>
|
| 55 |
+
<div id="observablehq-viewof-umapOptions-7dbb745c"></div>
|
| 56 |
+
<div id="observablehq-umapbutton-7dbb745c"></div>
|
| 57 |
+
<div id="observablehq-hdbscanoptions-7dbb745c"></div>
|
| 58 |
+
<div id="observablehq-viewof-clusterOptions-7dbb745c"></div>
|
| 59 |
+
<div id="observablehq-hdbcscanbutton-7dbb745c"></div>
|
| 60 |
+
</article>
|
| 61 |
+
|
| 62 |
<script type="module">
|
| 63 |
import {
|
| 64 |
Runtime,
|
| 65 |
Inspector,
|
| 66 |
} from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
|
| 67 |
import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
|
| 68 |
+
new Runtime().module(define, (name) => {
|
| 69 |
+
if (name === "intro")
|
| 70 |
+
return new Inspector(
|
| 71 |
+
document.querySelector("#observablehq-intro-7dbb745c")
|
| 72 |
+
);
|
| 73 |
+
if (name === "viewof sentences")
|
| 74 |
+
return new Inspector(
|
| 75 |
+
document.querySelector("#observablehq-viewof-sentences-7dbb745c")
|
| 76 |
+
);
|
| 77 |
+
if (name === "viewof params")
|
| 78 |
+
return new Inspector(
|
| 79 |
+
document.querySelector("#observablehq-viewof-params-7dbb745c")
|
| 80 |
+
);
|
| 81 |
+
if (name === "viewof tryme")
|
| 82 |
+
return new Inspector(
|
| 83 |
+
document.querySelector("#observablehq-viewof-tryme-7dbb745c")
|
| 84 |
+
);
|
| 85 |
+
if (name === "viewof clear")
|
| 86 |
+
return new Inspector(
|
| 87 |
+
document.querySelector("#observablehq-viewof-clear-7dbb745c")
|
| 88 |
+
);
|
| 89 |
+
if (name === "dialog")
|
| 90 |
+
return new Inspector(
|
| 91 |
+
document.querySelector("#observablehq-dialog-7dbb745c")
|
| 92 |
+
);
|
| 93 |
+
if (name === "viewof scatter")
|
| 94 |
+
return new Inspector(
|
| 95 |
+
document.querySelector("#observablehq-viewof-scatter-7dbb745c")
|
| 96 |
+
);
|
| 97 |
+
if (name === "umapoptions")
|
| 98 |
+
return new Inspector(
|
| 99 |
+
document.querySelector("#observablehq-umapoptions-7dbb745c")
|
| 100 |
+
);
|
| 101 |
+
if (name === "viewof umapOptions")
|
| 102 |
+
return new Inspector(
|
| 103 |
+
document.querySelector("#observablehq-viewof-umapOptions-7dbb745c")
|
| 104 |
+
);
|
| 105 |
+
if (name === "umapbutton")
|
| 106 |
+
return new Inspector(
|
| 107 |
+
document.querySelector("#observablehq-umapbutton-7dbb745c")
|
| 108 |
+
);
|
| 109 |
+
if (name === "hdbscanoptions")
|
| 110 |
+
return new Inspector(
|
| 111 |
+
document.querySelector("#observablehq-hdbscanoptions-7dbb745c")
|
| 112 |
+
);
|
| 113 |
+
if (name === "viewof clusterOptions")
|
| 114 |
+
return new Inspector(
|
| 115 |
+
document.querySelector(
|
| 116 |
+
"#observablehq-viewof-clusterOptions-7dbb745c"
|
| 117 |
+
)
|
| 118 |
+
);
|
| 119 |
+
if (name === "hdbcscanbutton")
|
| 120 |
+
return new Inspector(
|
| 121 |
+
document.querySelector("#observablehq-hdbcscanbutton-7dbb745c")
|
| 122 |
+
);
|
| 123 |
+
return ["update", "data", "colorScale"].includes(name);
|
| 124 |
+
});
|
| 125 |
</script>
|
| 126 |
</body>
|
| 127 |
</html>
|