Spaces:
Running
Running
import streamlit as st | |
import os | |
import tempfile | |
from qdrant_client import QdrantClient, models | |
from langchain.vectorstores import Qdrant | |
from langchain.embeddings import FakeEmbeddings | |
from pathlib import Path | |
import shutil | |
st.set_page_config( | |
page_title="FormPilot - Qdrant Dependency Test", | |
page_icon="🧪", | |
layout="wide", | |
) | |
st.title("🧪 FormPilot - Qdrant Dependency Test UI") | |
# Create a sidebar with instructions | |
with st.sidebar: | |
st.header("Test Steps") | |
st.info(""" | |
This app helps verify your Qdrant setup for FormPilot. | |
1. Check package versions | |
2. Test basic Qdrant functionality | |
3. Test LangChain integration | |
4. Test form ingestion process simulation | |
""") | |
st.markdown("---") | |
st.subheader("Package Versions") | |
try: | |
import pkg_resources | |
packages = [ | |
'streamlit', 'pandas', 'qdrant-client', 'langchain', | |
'langchain-openai', 'openai', 'tqdm', 'python-dotenv' | |
] | |
for package in packages: | |
try: | |
version = pkg_resources.get_distribution(package).version | |
st.success(f"{package}: {version}") | |
except pkg_resources.DistributionNotFound: | |
st.error(f"{package}: Not installed") | |
except ImportError: | |
st.error("Could not import pkg_resources") | |
# Main content area | |
st.header("1. Create Test Environment") | |
# Use temporary directory or let user specify path | |
use_temp_dir = st.checkbox("Use temporary directory for tests", value=True) | |
if use_temp_dir: | |
temp_dir = tempfile.mkdtemp() | |
qdrant_path = os.path.join(temp_dir, "qdrant_test_data") | |
st.info(f"Using temporary directory: {qdrant_path}") | |
else: | |
qdrant_path = st.text_input("Enter path for Qdrant data:", value="./qdrant_test_data") | |
os.makedirs(qdrant_path, exist_ok=True) | |
# Create a test collection | |
st.header("2. Test Basic Qdrant Functionality") | |
test_collection_name = "test_collection" | |
if st.button("Create Test Collection"): | |
with st.spinner("Creating collection..."): | |
try: | |
client = QdrantClient(path=qdrant_path) | |
try: | |
# Check if collection exists using get_collection API | |
client.get_collection(collection_name=test_collection_name) | |
st.info(f"Collection '{test_collection_name}' already exists") | |
except Exception: | |
# Collection doesn't exist, create it | |
client.create_collection( | |
collection_name=test_collection_name, | |
vectors_config=models.VectorParams(size=2, distance=models.Distance.COSINE), | |
) | |
st.success(f"Created collection '{test_collection_name}'") | |
# Insert test vectors | |
vectors = [[1.0, 0.0], [0.0, 1.0]] | |
payloads = [{"text": "test1"}, {"text": "test2"}] | |
ids = [1, 2] | |
client.upload_collection( | |
collection_name=test_collection_name, | |
vectors=vectors, | |
payload=payloads, | |
ids=ids | |
) | |
# Check count | |
count = client.count(test_collection_name).count | |
st.success(f"Number of vectors in collection: {count}") | |
# Search for similar vectors | |
search_result = client.search( | |
collection_name=test_collection_name, | |
query_vector=[1.0, 0.0], | |
limit=1 | |
) | |
st.json({"Search Result": [{"id": res.id, "score": res.score, "payload": res.payload} for res in search_result]}) | |
except Exception as e: | |
st.error(f"Error: {e}") | |
# Test LangChain integration | |
st.header("3. Test LangChain Integration") | |
langchain_collection = "langchain_test" | |
if st.button("Test LangChain + Qdrant"): | |
with st.spinner("Testing LangChain integration..."): | |
try: | |
# Create a fake embeddings model for testing | |
class TestEmbeddings(FakeEmbeddings): | |
def embed_documents(self, texts): | |
# Return 1536-dim vectors (like OpenAI) | |
return [[1.0] * 1536 for _ in texts] | |
def embed_query(self, text): | |
# Return 1536-dim vector | |
return [1.0] * 1536 | |
embeddings = TestEmbeddings(size=1536) | |
# Create a new directory for this test | |
langchain_path = os.path.join(Path(qdrant_path).parent, "langchain_test") | |
os.makedirs(langchain_path, exist_ok=True) | |
st.info(f"LangChain test path: {langchain_path}") | |
# Initialize Qdrant with LangChain | |
try: | |
from langchain.schema.document import Document | |
docs = [Document(page_content="This is a test document", metadata={"source": "test"})] | |
vectordb = Qdrant.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
path=langchain_path, | |
collection_name=langchain_collection, | |
) | |
st.success("Successfully created Qdrant vector store with LangChain") | |
# Test search | |
results = vectordb.similarity_search("test query") | |
st.json({"Search Results": [{"content": doc.page_content, "metadata": doc.metadata} for doc in results]}) | |
except Exception as e: | |
st.error(f"LangChain integration error: {e}") | |
st.error("This may indicate version incompatibility between LangChain and Qdrant") | |
except Exception as e: | |
st.error(f"Error: {e}") | |
# Simulate form ingestion | |
st.header("4. Test Form Ingestion Simulation") | |
if st.button("Simulate Form Ingestion"): | |
with st.spinner("Simulating form ingestion process..."): | |
try: | |
# Create path for this test | |
ingest_path = os.path.join(Path(qdrant_path).parent, "ingest_test") | |
os.makedirs(ingest_path, exist_ok=True) | |
# Create collection for ingestion test | |
collection_name = "formpilot_test" | |
client = QdrantClient(path=ingest_path) | |
try: | |
# Check if collection exists | |
client.get_collection(collection_name=collection_name) | |
st.info(f"Collection '{collection_name}' already exists") | |
except Exception: | |
# Collection doesn't exist, create it | |
client.create_collection( | |
collection_name=collection_name, | |
vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE), | |
) | |
st.success(f"Created collection '{collection_name}'") | |
# Simulate adding vectors similar to ingest process | |
test_vectors = [ | |
[0.1] * 1536, # Simplified vector | |
[0.2] * 1536 | |
] | |
test_payloads = [ | |
{"text": "I-485 form instructions page 1", "source": "I-485-instr.pdf:page-1", "form": "I-485"}, | |
{"text": "I-485 form instructions page 2", "source": "I-485-instr.pdf:page-2", "form": "I-485"} | |
] | |
test_ids = [1, 2] | |
client.upload_collection( | |
collection_name=collection_name, | |
vectors=test_vectors, | |
payload=test_payloads, | |
ids=test_ids, | |
batch_size=64 | |
) | |
# Verify count | |
count = client.count(collection_name).count | |
st.success(f"Number of vectors in test collection: {count}") | |
# Retrieve a point to confirm structure | |
point = client.retrieve(collection_name, ids=[1]) | |
if point: | |
st.json({"Retrieved Point": {"id": point[0].id, "payload": point[0].payload}}) | |
else: | |
st.error("Could not retrieve point") | |
except Exception as e: | |
st.error(f"Error: {e}") | |
# Cleanup | |
st.header("5. Cleanup") | |
if st.button("Clean Up Test Directories"): | |
with st.spinner("Cleaning up..."): | |
try: | |
if use_temp_dir: | |
shutil.rmtree(Path(qdrant_path).parent) | |
st.success(f"Removed temp directory: {Path(qdrant_path).parent}") | |
else: | |
if os.path.exists(qdrant_path): | |
shutil.rmtree(qdrant_path) | |
st.success(f"Removed directory: {qdrant_path}") | |
langchain_path = os.path.join(Path(qdrant_path).parent, "langchain_test") | |
if os.path.exists(langchain_path): | |
shutil.rmtree(langchain_path) | |
st.success(f"Removed directory: {langchain_path}") | |
ingest_path = os.path.join(Path(qdrant_path).parent, "ingest_test") | |
if os.path.exists(ingest_path): | |
shutil.rmtree(ingest_path) | |
st.success(f"Removed directory: {ingest_path}") | |
except Exception as e: | |
st.error(f"Error during cleanup: {e}") | |
# Summary | |
st.header("6. Summary") | |
st.markdown(""" | |
If all tests passed successfully, your Qdrant and LangChain dependencies are correctly configured for FormPilot. | |
### Common Issues: | |
1. **Version Incompatibilities**: Ensure you're using compatible versions of Qdrant client and LangChain. | |
2. **API Changes**: The LangChain API has changed significantly in recent versions. Your code may need updates. | |
3. **Missing Dependencies**: Make sure all required packages are installed. | |
### Recommendations: | |
Based on your code analysis, consider using these pinned versions: | |
``` | |
qdrant-client==1.7.3 | |
langchain==0.1.12 | |
langchain-openai==0.1.0 | |
``` | |
Check the notebook for a more detailed dependency test if you need it. | |
""") |