Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +26 -32
src/streamlit_app.py
CHANGED
|
@@ -175,19 +175,13 @@ if 'chunks' not in st.session_state:
|
|
| 175 |
if 'response' not in st.session_state:
|
| 176 |
st.session_state.response=''
|
| 177 |
# Sidebar document upload
|
| 178 |
-
|
| 179 |
-
uploaded_file = st.sidebar.file_uploader(
|
| 180 |
-
"Upload your document π",
|
| 181 |
-
type=["pdf"],
|
| 182 |
-
label_visibility="collapsed"
|
| 183 |
-
)
|
| 184 |
-
upload_button=st.sidebar.button("Upload Document")
|
| 185 |
uploaded_file = st.sidebar.file_uploader(
|
| 186 |
"Upload your PDF",
|
| 187 |
type=["pdf"],
|
| 188 |
key="pdf_uploader",
|
| 189 |
)
|
| 190 |
-
|
| 191 |
def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
|
| 192 |
reader = PdfReader(io.BytesIO(file_bytes))
|
| 193 |
pages_text = []
|
|
@@ -195,30 +189,30 @@ def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
|
|
| 195 |
txt = p.extract_text() or ""
|
| 196 |
pages_text.append(txt)
|
| 197 |
return "\n".join(pages_text)
|
| 198 |
-
|
| 199 |
-
if uploaded_file is not None:
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
|
| 223 |
st.sidebar.write("Before making the your faviorate charecter sound, authenicate your code")
|
| 224 |
Authenication=st.sidebar.button('Authenicate')
|
|
|
|
| 175 |
if 'response' not in st.session_state:
|
| 176 |
st.session_state.response=''
|
| 177 |
# Sidebar document upload
|
| 178 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
uploaded_file = st.sidebar.file_uploader(
|
| 180 |
"Upload your PDF",
|
| 181 |
type=["pdf"],
|
| 182 |
key="pdf_uploader",
|
| 183 |
)
|
| 184 |
+
upload_button=st.sidebar.button("Uploading your document π")
|
| 185 |
def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
|
| 186 |
reader = PdfReader(io.BytesIO(file_bytes))
|
| 187 |
pages_text = []
|
|
|
|
| 189 |
txt = p.extract_text() or ""
|
| 190 |
pages_text.append(txt)
|
| 191 |
return "\n".join(pages_text)
|
| 192 |
+
if upload_button:
|
| 193 |
+
if uploaded_file is not None:
|
| 194 |
+
with st.spinner("Reading & embedding your PDF..."):
|
| 195 |
+
# Important: read bytes once on this rerun
|
| 196 |
+
file_bytes = uploaded_file.read()
|
| 197 |
+
# (Optional) if you ever re-use uploaded_file later, do: uploaded_file.seek(0)
|
| 198 |
+
|
| 199 |
+
# Extract text purely in-memory (no /tmp files, no PyPDFLoader)
|
| 200 |
+
file_text = extract_pdf_text_from_bytes(file_bytes)
|
| 201 |
+
|
| 202 |
+
# Persist to session state
|
| 203 |
+
st.session_state.file_text = file_text
|
| 204 |
+
|
| 205 |
+
# Build embeddings (uses your existing text_splitter + encoder)
|
| 206 |
+
chunks = text_splitter.split_text(file_text)
|
| 207 |
+
embeddings = st.session_state.encoder.encode(
|
| 208 |
+
chunks, convert_to_tensor=True, show_progress_bar=True
|
| 209 |
+
).cpu().numpy()
|
| 210 |
+
|
| 211 |
+
st.session_state.embeddings = embeddings
|
| 212 |
+
st.session_state.chunks = chunks
|
| 213 |
+
st.session_state.doc_flag = True
|
| 214 |
+
|
| 215 |
+
st.success(f"Loaded: {uploaded_file.name} β {len(st.session_state.chunks)} chunks")
|
| 216 |
|
| 217 |
st.sidebar.write("Before making the your faviorate charecter sound, authenicate your code")
|
| 218 |
Authenication=st.sidebar.button('Authenicate')
|