Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import faiss | |
import torch | |
import os | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from huggingface_hub import hf_hub_download | |
# Configure environment paths to avoid permission issues | |
os.environ['HF_HOME'] = '/tmp/huggingface' | |
os.environ['STREAMLIT_SERVER_ROOT'] = '/tmp/streamlit' | |
REPO_ID = "uiuxarghya/intelliflix-store" | |
default_poster = "https://i.ibb.co/pHxqDX6/2ebfe3fcf82a4c6ccac494de2306a357.jpg" | |
# Set up Streamlit page configuration | |
st.set_page_config( | |
page_title="IntelliFlix: Semantic Movie Recommender", | |
page_icon="π¬", | |
layout="centered", | |
initial_sidebar_state="expanded", | |
menu_items={ | |
'Get Help': 'https://github.com/uiuxarghya/intelliflix/issues', | |
'Report a bug': 'https://github.com/uiuxarghya/intelliflix/issues', | |
'About': ( | |
"IntelliFlix is a powerful movie recommender built with Sentence Transformers " | |
"and FAISS. It helps you find similar movies based on plot descriptions. Built by " | |
"[Arghya Ghosh](https://arghya.dev). Try out the app and discover your next favorite movie!" | |
) | |
} | |
) | |
# Create necessary directories if they don't exist | |
os.makedirs('/tmp/huggingface', exist_ok=True) | |
os.makedirs('/tmp/streamlit', exist_ok=True) | |
st.title("π¬ IntelliFlix: Semantic Movie Recommender") | |
st.markdown("Find similar movies based on plot descriptions using Sentence Transformers + FAISS.") | |
def load_model_and_data(): | |
try: | |
# Load model with explicit cache directory | |
model = SentenceTransformer( | |
"sentence-transformers/all-MiniLM-L12-v2", | |
cache_folder='/tmp/huggingface' | |
) | |
# Load files from Hugging Face Hub | |
with st.spinner("Loading movie data..."): | |
csv_path = hf_hub_download( | |
repo_id=REPO_ID, | |
filename="data/tmdb_movies_dataset_processed.csv", | |
repo_type="dataset", | |
cache_dir='/tmp/huggingface' | |
) | |
df = pd.read_csv(csv_path) | |
with st.spinner("Loading embeddings..."): | |
embeddings_path = hf_hub_download( | |
repo_id=REPO_ID, | |
filename="embeddings/movie_ovierview_embeddings.npy", | |
repo_type="dataset", | |
cache_dir='/tmp/huggingface' | |
) | |
embeddings = np.load(embeddings_path) | |
with st.spinner("Loading FAISS index..."): | |
faiss_path = hf_hub_download( | |
repo_id=REPO_ID, | |
filename="indexes/movie_overview_index.faiss", | |
repo_type="dataset", | |
cache_dir='/tmp/huggingface' | |
) | |
index = faiss.read_index(faiss_path) | |
return model, df, embeddings, index | |
except Exception as e: | |
st.error(f"Error loading model or data: {str(e)}") | |
st.stop() | |
model, df, embeddings, index = load_model_and_data() | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
def semantic_search(query, k=15): | |
try: | |
query_vec = model.encode([query], convert_to_tensor=True, device=device) | |
query_np = query_vec.cpu().numpy().astype("float32") | |
D, I = index.search(query_np, k) | |
similarities = cosine_similarity(query_np, embeddings[I[0]])[0] | |
results = df.iloc[I[0]].copy() | |
results["similarity"] = similarities | |
results["poster_url"] = results["poster_path"].apply( | |
lambda path: f"https://image.tmdb.org/t/p/w500{path}" if pd.notnull(path) else default_poster | |
) | |
return results.sort_values(by="similarity", ascending=False).reset_index(drop=True) | |
except Exception as e: | |
st.error(f"Error during search: {str(e)}") | |
return pd.DataFrame() | |
# Main UI | |
query = st.text_input( | |
"π Enter a movie plot or description:", | |
"An adventure of explorers lost in space for a wormhole and tries to survive on a distant planet.", | |
help="Describe a movie plot or theme to find similar movies" | |
).strip() | |
if query: | |
with st.spinner("Finding similar movies..."): | |
results = semantic_search(query) | |
if not results.empty: | |
st.subheader(f"π Top {len(results)} similar movies:") | |
cols = st.columns(3) | |
for idx, (_, row) in enumerate(results.iterrows()): | |
with cols[idx % 3]: | |
st.image( | |
row["poster_url"], | |
width=200, | |
caption=f"{row['title']} ({row['release_date'][:4]})" | |
) | |
with st.expander(f"Similarity: {row['similarity']:.2f}"): | |
st.write(row['overview']) | |
else: | |
st.warning("No results found. Try a different query.") | |
# Footer | |
st.markdown("---") | |
st.markdown(""" | |
**Built with** β€οΈ **by [Arghya Ghosh](https://arghya.dev)** | |
*Technologies used: FAISS + Sentence Transformers + Streamlit* | |
""") |