warhawkmonk commited on
Commit
5575c40
·
verified ·
1 Parent(s): 61987b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -73
app.py CHANGED
@@ -413,51 +413,51 @@ else:
413
 
414
 
415
 
416
-
417
- with column1:
418
- # Create a canvas component
419
- changes,implementation,current=st.columns([0.01,0.9,0.01])
420
-
421
- with implementation:
422
- st.write("<br>"*3,unsafe_allow_html=True)
423
- if bg_doc:
424
-
425
- canvas_result=None
426
-
427
- with open("temp.pdf", "wb") as f:
428
- f.write(bg_doc.getbuffer())
429
-
430
- # Process the uploaded PDF file
431
- data = process_pdf("temp.pdf")
432
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
433
- chunks = text_splitter.split_documents(data)
434
- # chunk_texts = [str(chunk.page_content) for chunk in chunks]
435
- # print("testing",chunk_texts)
436
- model_name = "all-MiniLM-L6-v2"
437
- model = SentenceTransformer(model_name)
438
- embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
439
-
440
- vector_store = []
441
- for chunk, embedding in zip(chunks, embeddings):
442
- vector_store.append((embedding, chunk.page_content) )
443
 
444
- else:
445
-
446
-
447
-
448
- canvas_result = st_canvas(
449
- fill_color="rgba(0, 0, 0, 0.3)", # Fixed fill color with some opacity
450
- stroke_width=stroke_width,
451
- stroke_color=stroke_color,
452
- background_color=bg_color,
453
- background_image=gen_image if gen_image else Image.open("/home/user/app/ALL_image_formation/image_gen.png"),
454
- update_streamlit=True,
455
- height=int(screen_height//2.16) if screen_height!=1180 else screen_height//2,
456
- width=int(screen_width//2.3) if screen_width!=820 else screen_width//2,
457
- drawing_mode=drawing_mode,
458
- point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
459
- key="canvas",
460
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
 
462
 
463
 
@@ -471,35 +471,36 @@ with column1:
471
 
472
  # run=st.button("run_experiment")
473
  if bg_doc:
474
- if len(dictionary['every_prompt_with_val'])==0:
475
- query_embedding = model.encode(["something"])
476
- else:
477
-
478
- query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
479
- retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for match in vector_store])[-1]
480
-
481
-
482
-
483
- with implementation:
484
- with st.spinner('Wait for it...'):
485
- text_lookup=retrieved_chunks
486
- pages=[]
487
- buffer = bg_doc.getbuffer()
488
- byte_data = bytes(buffer)
489
- with fitz.open("temp.pdf") as doc:
490
-
491
- for page_no in range(doc.page_count):
492
- pages.append(doc.load_page(page_no - 1))
493
-
494
- # areas = pages[page_number-1].search_for(text_lookup)
495
- with st.container(height=int(screen_height//1.8)):
496
- for pg_no in pages[::-1]:
497
- areas = pg_no.search_for(text_lookup)
498
- for area in areas:
499
- pg_no.add_rect_annot(area)
500
-
501
- pix = pg_no.get_pixmap(dpi=100).tobytes()
502
- st.image(pix,use_container_width=True)
 
503
 
504
  if bg_doc and prompt:
505
  query_embedding = model.encode([prompt])
 
413
 
414
 
415
 
416
+ with st.spinner('Wait for it...'):
417
+ with column1:
418
+ # Create a canvas component
419
+ changes,implementation,current=st.columns([0.01,0.9,0.01])
420
+
421
+ with implementation:
422
+ st.write("<br>"*3,unsafe_allow_html=True)
423
+ if bg_doc:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
+ canvas_result=None
426
+
427
+ with open("temp.pdf", "wb") as f:
428
+ f.write(bg_doc.getbuffer())
429
+
430
+ # Process the uploaded PDF file
431
+ data = process_pdf("temp.pdf")
432
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
433
+ chunks = text_splitter.split_documents(data)
434
+ # chunk_texts = [str(chunk.page_content) for chunk in chunks]
435
+ # print("testing",chunk_texts)
436
+ model_name = "all-MiniLM-L6-v2"
437
+ model = SentenceTransformer(model_name)
438
+ embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
439
+
440
+ vector_store = []
441
+ for chunk, embedding in zip(chunks, embeddings):
442
+ vector_store.append((embedding, chunk.page_content) )
443
+
444
+ else:
445
+
446
+
447
+
448
+ canvas_result = st_canvas(
449
+ fill_color="rgba(0, 0, 0, 0.3)", # Fixed fill color with some opacity
450
+ stroke_width=stroke_width,
451
+ stroke_color=stroke_color,
452
+ background_color=bg_color,
453
+ background_image=gen_image if gen_image else Image.open("/home/user/app/ALL_image_formation/image_gen.png"),
454
+ update_streamlit=True,
455
+ height=int(screen_height//2.16) if screen_height!=1180 else screen_height//2,
456
+ width=int(screen_width//2.3) if screen_width!=820 else screen_width//2,
457
+ drawing_mode=drawing_mode,
458
+ point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
459
+ key="canvas",
460
+ )
461
 
462
 
463
 
 
471
 
472
  # run=st.button("run_experiment")
473
  if bg_doc:
474
+ with st.spinner('Wait for it...'):
475
+ if len(dictionary['every_prompt_with_val'])==0:
476
+ query_embedding = model.encode(["something"])
477
+ else:
478
+
479
+ query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
480
+ retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for match in vector_store])[-1]
481
+
482
+
483
+
484
+ with implementation:
485
+ with st.spinner('Wait for it...'):
486
+ text_lookup=retrieved_chunks
487
+ pages=[]
488
+ buffer = bg_doc.getbuffer()
489
+ byte_data = bytes(buffer)
490
+ with fitz.open("temp.pdf") as doc:
491
+
492
+ for page_no in range(doc.page_count):
493
+ pages.append(doc.load_page(page_no - 1))
494
+
495
+ # areas = pages[page_number-1].search_for(text_lookup)
496
+ with st.container(height=int(screen_height//1.8)):
497
+ for pg_no in pages[::-1]:
498
+ areas = pg_no.search_for(text_lookup)
499
+ for area in areas:
500
+ pg_no.add_rect_annot(area)
501
+
502
+ pix = pg_no.get_pixmap(dpi=100).tobytes()
503
+ st.image(pix,use_container_width=True)
504
 
505
  if bg_doc and prompt:
506
  query_embedding = model.encode([prompt])