Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -413,51 +413,51 @@ else:
|
|
413 |
|
414 |
|
415 |
|
416 |
-
|
417 |
-
with column1:
|
418 |
-
# Create a canvas component
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
canvas_result=None
|
426 |
-
|
427 |
-
with open("temp.pdf", "wb") as f:
|
428 |
-
f.write(bg_doc.getbuffer())
|
429 |
-
|
430 |
-
# Process the uploaded PDF file
|
431 |
-
data = process_pdf("temp.pdf")
|
432 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
433 |
-
chunks = text_splitter.split_documents(data)
|
434 |
-
# chunk_texts = [str(chunk.page_content) for chunk in chunks]
|
435 |
-
# print("testing",chunk_texts)
|
436 |
-
model_name = "all-MiniLM-L6-v2"
|
437 |
-
model = SentenceTransformer(model_name)
|
438 |
-
embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
|
439 |
-
|
440 |
-
vector_store = []
|
441 |
-
for chunk, embedding in zip(chunks, embeddings):
|
442 |
-
vector_store.append((embedding, chunk.page_content) )
|
443 |
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
|
462 |
|
463 |
|
@@ -471,35 +471,36 @@ with column1:
|
|
471 |
|
472 |
# run=st.button("run_experiment")
|
473 |
if bg_doc:
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
with
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
|
|
503 |
|
504 |
if bg_doc and prompt:
|
505 |
query_embedding = model.encode([prompt])
|
|
|
413 |
|
414 |
|
415 |
|
416 |
+
with st.spinner('Wait for it...'):
|
417 |
+
with column1:
|
418 |
+
# Create a canvas component
|
419 |
+
changes,implementation,current=st.columns([0.01,0.9,0.01])
|
420 |
+
|
421 |
+
with implementation:
|
422 |
+
st.write("<br>"*3,unsafe_allow_html=True)
|
423 |
+
if bg_doc:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
+
canvas_result=None
|
426 |
+
|
427 |
+
with open("temp.pdf", "wb") as f:
|
428 |
+
f.write(bg_doc.getbuffer())
|
429 |
+
|
430 |
+
# Process the uploaded PDF file
|
431 |
+
data = process_pdf("temp.pdf")
|
432 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
433 |
+
chunks = text_splitter.split_documents(data)
|
434 |
+
# chunk_texts = [str(chunk.page_content) for chunk in chunks]
|
435 |
+
# print("testing",chunk_texts)
|
436 |
+
model_name = "all-MiniLM-L6-v2"
|
437 |
+
model = SentenceTransformer(model_name)
|
438 |
+
embeddings = [model.encode(str(chunk.page_content)) for chunk in chunks]
|
439 |
+
|
440 |
+
vector_store = []
|
441 |
+
for chunk, embedding in zip(chunks, embeddings):
|
442 |
+
vector_store.append((embedding, chunk.page_content) )
|
443 |
+
|
444 |
+
else:
|
445 |
+
|
446 |
+
|
447 |
+
|
448 |
+
canvas_result = st_canvas(
|
449 |
+
fill_color="rgba(0, 0, 0, 0.3)", # Fixed fill color with some opacity
|
450 |
+
stroke_width=stroke_width,
|
451 |
+
stroke_color=stroke_color,
|
452 |
+
background_color=bg_color,
|
453 |
+
background_image=gen_image if gen_image else Image.open("/home/user/app/ALL_image_formation/image_gen.png"),
|
454 |
+
update_streamlit=True,
|
455 |
+
height=int(screen_height//2.16) if screen_height!=1180 else screen_height//2,
|
456 |
+
width=int(screen_width//2.3) if screen_width!=820 else screen_width//2,
|
457 |
+
drawing_mode=drawing_mode,
|
458 |
+
point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
|
459 |
+
key="canvas",
|
460 |
+
)
|
461 |
|
462 |
|
463 |
|
|
|
471 |
|
472 |
# run=st.button("run_experiment")
|
473 |
if bg_doc:
|
474 |
+
with st.spinner('Wait for it...'):
|
475 |
+
if len(dictionary['every_prompt_with_val'])==0:
|
476 |
+
query_embedding = model.encode(["something"])
|
477 |
+
else:
|
478 |
+
|
479 |
+
query_embedding = model.encode([dictionary['every_prompt_with_val'][-1][0]])
|
480 |
+
retrieved_chunks = max([(util.cos_sim(match[0],query_embedding),match[-1])for match in vector_store])[-1]
|
481 |
+
|
482 |
+
|
483 |
+
|
484 |
+
with implementation:
|
485 |
+
with st.spinner('Wait for it...'):
|
486 |
+
text_lookup=retrieved_chunks
|
487 |
+
pages=[]
|
488 |
+
buffer = bg_doc.getbuffer()
|
489 |
+
byte_data = bytes(buffer)
|
490 |
+
with fitz.open("temp.pdf") as doc:
|
491 |
+
|
492 |
+
for page_no in range(doc.page_count):
|
493 |
+
pages.append(doc.load_page(page_no - 1))
|
494 |
+
|
495 |
+
# areas = pages[page_number-1].search_for(text_lookup)
|
496 |
+
with st.container(height=int(screen_height//1.8)):
|
497 |
+
for pg_no in pages[::-1]:
|
498 |
+
areas = pg_no.search_for(text_lookup)
|
499 |
+
for area in areas:
|
500 |
+
pg_no.add_rect_annot(area)
|
501 |
+
|
502 |
+
pix = pg_no.get_pixmap(dpi=100).tobytes()
|
503 |
+
st.image(pix,use_container_width=True)
|
504 |
|
505 |
if bg_doc and prompt:
|
506 |
query_embedding = model.encode([prompt])
|