Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -228,6 +228,8 @@ if "current_session" not in dictionary:
|
|
228 |
dictionary['current_session']=None
|
229 |
if "image_movement" not in dictionary:
|
230 |
dictionary['image_movement']=None
|
|
|
|
|
231 |
|
232 |
stroke_width = st.sidebar.slider("Stroke width: ", 1, 25, 20)
|
233 |
if drawing_mode == 'point':
|
@@ -531,17 +533,31 @@ with st.spinner('Wait for it...'):
|
|
531 |
|
532 |
# Process the uploaded PDF file
|
533 |
data = process_pdf("temp.pdf")
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
|
539 |
-
|
540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
541 |
|
542 |
-
vector_store = []
|
543 |
-
for chunk, embedding in zip(chunks, embeddings):
|
544 |
-
|
545 |
|
546 |
else:
|
547 |
if screen_width<=485:
|
|
|
228 |
dictionary['current_session']=None
|
229 |
if "image_movement" not in dictionary:
|
230 |
dictionary['image_movement']=None
|
231 |
+
if "text_embeddings" not in dictionary:
|
232 |
+
dictionary['text_embeddings']={}
|
233 |
|
234 |
stroke_width = st.sidebar.slider("Stroke width: ", 1, 25, 20)
|
235 |
if drawing_mode == 'point':
|
|
|
533 |
|
534 |
# Process the uploaded PDF file
|
535 |
data = process_pdf("temp.pdf")
|
536 |
+
if str(data) not in dictionary['text_embeddings']:
|
537 |
+
dictionary['text_embeddings']={}
|
538 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
|
539 |
+
chunks = text_splitter.split_documents(data)
|
540 |
|
541 |
+
dictionary['text_embeddings'][str(data)]={str(chunk.page_content):model.encode(str(chunk.page_content)) for chunk in chunks}
|
542 |
+
|
543 |
+
embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
|
544 |
+
st.rerun()
|
545 |
+
else:
|
546 |
+
embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
|
547 |
+
vector_store = []
|
548 |
+
for i in dictionary['text_embeddings'][str(data)]:
|
549 |
+
vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
|
550 |
+
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
551 |
+
# chunks = text_splitter.split_documents(data)
|
552 |
+
# # chunk_texts = [str(chunk.page_content) for chunk in chunks]
|
553 |
+
# # print("testing",chunk_texts)
|
554 |
+
|
555 |
+
# model = encoding_model()
|
556 |
+
# embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
|
557 |
|
558 |
+
# vector_store = []
|
559 |
+
# for chunk, embedding in zip(chunks, embeddings):
|
560 |
+
# vector_store.append((embedding, chunk.page_content) )
|
561 |
|
562 |
else:
|
563 |
if screen_width<=485:
|