warhawkmonk commited on
Commit
e6d537d
·
verified ·
1 Parent(s): a773fcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -228,6 +228,8 @@ if "current_session" not in dictionary:
228
  dictionary['current_session']=None
229
  if "image_movement" not in dictionary:
230
  dictionary['image_movement']=None
 
 
231
 
232
  stroke_width = st.sidebar.slider("Stroke width: ", 1, 25, 20)
233
  if drawing_mode == 'point':
@@ -531,17 +533,31 @@ with st.spinner('Wait for it...'):
531
 
532
  # Process the uploaded PDF file
533
  data = process_pdf("temp.pdf")
534
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
535
- chunks = text_splitter.split_documents(data)
536
- # chunk_texts = [str(chunk.page_content) for chunk in chunks]
537
- # print("testing",chunk_texts)
538
 
539
- model = encoding_model()
540
- embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
 
542
- vector_store = []
543
- for chunk, embedding in zip(chunks, embeddings):
544
- vector_store.append((embedding, chunk.page_content) )
545
 
546
  else:
547
  if screen_width<=485:
 
228
  dictionary['current_session']=None
229
  if "image_movement" not in dictionary:
230
  dictionary['image_movement']=None
231
+ if "text_embeddings" not in dictionary:
232
+ dictionary['text_embeddings']={}
233
 
234
  stroke_width = st.sidebar.slider("Stroke width: ", 1, 25, 20)
235
  if drawing_mode == 'point':
 
533
 
534
  # Process the uploaded PDF file
535
  data = process_pdf("temp.pdf")
536
+ if str(data) not in dictionary['text_embeddings']:
537
+ dictionary['text_embeddings']={}
538
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
539
+ chunks = text_splitter.split_documents(data)
540
 
541
+ dictionary['text_embeddings'][str(data)]={str(chunk.page_content):model.encode(str(chunk.page_content)) for chunk in chunks}
542
+
543
+ embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
544
+ st.rerun()
545
+ else:
546
+ embeddings = [dictionary['text_embeddings'][str(data)][i] for i in dictionary['text_embeddings'][str(data)]]
547
+ vector_store = []
548
+ for i in dictionary['text_embeddings'][str(data)]:
549
+ vector_store.append((dictionary['text_embeddings'][str(data)][i],i))
550
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
551
+ # chunks = text_splitter.split_documents(data)
552
+ # # chunk_texts = [str(chunk.page_content) for chunk in chunks]
553
+ # # print("testing",chunk_texts)
554
+
555
+ # model = encoding_model()
556
+ # embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
557
 
558
+ # vector_store = []
559
+ # for chunk, embedding in zip(chunks, embeddings):
560
+ # vector_store.append((embedding, chunk.page_content) )
561
 
562
  else:
563
  if screen_width<=485: