intelliflix / app.py
uiuxarghya's picture
fix(app): add FAISS library import for enhanced semantic search functionality
1e5b08d verified
import streamlit as st
import pandas as pd
import numpy as np
import faiss
import torch
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from huggingface_hub import hf_hub_download
# Configure environment paths to avoid permission issues
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['STREAMLIT_SERVER_ROOT'] = '/tmp/streamlit'
REPO_ID = "uiuxarghya/intelliflix-store"
default_poster = "https://i.ibb.co/pHxqDX6/2ebfe3fcf82a4c6ccac494de2306a357.jpg"
# Set up Streamlit page configuration
st.set_page_config(
page_title="IntelliFlix: Semantic Movie Recommender",
page_icon="🎬",
layout="centered",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://github.com/uiuxarghya/intelliflix/issues',
'Report a bug': 'https://github.com/uiuxarghya/intelliflix/issues',
'About': (
"IntelliFlix is a powerful movie recommender built with Sentence Transformers "
"and FAISS. It helps you find similar movies based on plot descriptions. Built by "
"[Arghya Ghosh](https://arghya.dev). Try out the app and discover your next favorite movie!"
)
}
)
# Create necessary directories if they don't exist
os.makedirs('/tmp/huggingface', exist_ok=True)
os.makedirs('/tmp/streamlit', exist_ok=True)
st.title("🎬 IntelliFlix: Semantic Movie Recommender")
st.markdown("Find similar movies based on plot descriptions using Sentence Transformers + FAISS.")
@st.cache_resource(show_spinner=False)
def load_model_and_data():
try:
# Load model with explicit cache directory
model = SentenceTransformer(
"sentence-transformers/all-MiniLM-L12-v2",
cache_folder='/tmp/huggingface'
)
# Load files from Hugging Face Hub
with st.spinner("Loading movie data..."):
csv_path = hf_hub_download(
repo_id=REPO_ID,
filename="data/tmdb_movies_dataset_processed.csv",
repo_type="dataset",
cache_dir='/tmp/huggingface'
)
df = pd.read_csv(csv_path)
with st.spinner("Loading embeddings..."):
embeddings_path = hf_hub_download(
repo_id=REPO_ID,
filename="embeddings/movie_ovierview_embeddings.npy",
repo_type="dataset",
cache_dir='/tmp/huggingface'
)
embeddings = np.load(embeddings_path)
with st.spinner("Loading FAISS index..."):
faiss_path = hf_hub_download(
repo_id=REPO_ID,
filename="indexes/movie_overview_index.faiss",
repo_type="dataset",
cache_dir='/tmp/huggingface'
)
index = faiss.read_index(faiss_path)
return model, df, embeddings, index
except Exception as e:
st.error(f"Error loading model or data: {str(e)}")
st.stop()
model, df, embeddings, index = load_model_and_data()
device = "cuda" if torch.cuda.is_available() else "cpu"
def semantic_search(query, k=15):
try:
query_vec = model.encode([query], convert_to_tensor=True, device=device)
query_np = query_vec.cpu().numpy().astype("float32")
D, I = index.search(query_np, k)
similarities = cosine_similarity(query_np, embeddings[I[0]])[0]
results = df.iloc[I[0]].copy()
results["similarity"] = similarities
results["poster_url"] = results["poster_path"].apply(
lambda path: f"https://image.tmdb.org/t/p/w500{path}" if pd.notnull(path) else default_poster
)
return results.sort_values(by="similarity", ascending=False).reset_index(drop=True)
except Exception as e:
st.error(f"Error during search: {str(e)}")
return pd.DataFrame()
# Main UI
query = st.text_input(
"πŸ” Enter a movie plot or description:",
"An adventure of explorers lost in space for a wormhole and tries to survive on a distant planet.",
help="Describe a movie plot or theme to find similar movies"
).strip()
if query:
with st.spinner("Finding similar movies..."):
results = semantic_search(query)
if not results.empty:
st.subheader(f"πŸ” Top {len(results)} similar movies:")
cols = st.columns(3)
for idx, (_, row) in enumerate(results.iterrows()):
with cols[idx % 3]:
st.image(
row["poster_url"],
width=200,
caption=f"{row['title']} ({row['release_date'][:4]})"
)
with st.expander(f"Similarity: {row['similarity']:.2f}"):
st.write(row['overview'])
else:
st.warning("No results found. Try a different query.")
# Footer
st.markdown("---")
st.markdown("""
**Built with** ❀️ **by [Arghya Ghosh](https://arghya.dev)**
*Technologies used: FAISS + Sentence Transformers + Streamlit*
""")