Spaces:

uiuxarghya
/

intelliflix

Running

File size: 5,014 Bytes

6fe943b
7a16b87
 
1e5b08d
7a16b87
 
 
 
 
6fe943b
7a16b87

import streamlit as st
import pandas as pd
import numpy as np
import faiss
import torch
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from huggingface_hub import hf_hub_download

# Configure environment paths to avoid permission issues
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['STREAMLIT_SERVER_ROOT'] = '/tmp/streamlit'

REPO_ID = "uiuxarghya/intelliflix-store"
default_poster = "https://i.ibb.co/pHxqDX6/2ebfe3fcf82a4c6ccac494de2306a357.jpg"

# Set up Streamlit page configuration
st.set_page_config(
    page_title="IntelliFlix: Semantic Movie Recommender",
    page_icon="🎬",
    layout="centered",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://github.com/uiuxarghya/intelliflix/issues',
        'Report a bug': 'https://github.com/uiuxarghya/intelliflix/issues',
        'About': (
            "IntelliFlix is a powerful movie recommender built with Sentence Transformers "
            "and FAISS. It helps you find similar movies based on plot descriptions. Built by "
            "[Arghya Ghosh](https://arghya.dev). Try out the app and discover your next favorite movie!"
        )
    }
)

# Create necessary directories if they don't exist
os.makedirs('/tmp/huggingface', exist_ok=True)
os.makedirs('/tmp/streamlit', exist_ok=True)

st.title("🎬 IntelliFlix: Semantic Movie Recommender")
st.markdown("Find similar movies based on plot descriptions using Sentence Transformers + FAISS.")

@st.cache_resource(show_spinner=False)
def load_model_and_data():
    try:
        # Load model with explicit cache directory
        model = SentenceTransformer(
            "sentence-transformers/all-MiniLM-L12-v2",
            cache_folder='/tmp/huggingface'
        )

        # Load files from Hugging Face Hub
        with st.spinner("Loading movie data..."):
            csv_path = hf_hub_download(
                repo_id=REPO_ID,
                filename="data/tmdb_movies_dataset_processed.csv",
                repo_type="dataset",
                cache_dir='/tmp/huggingface'
            )
            df = pd.read_csv(csv_path)

        with st.spinner("Loading embeddings..."):
            embeddings_path = hf_hub_download(
                repo_id=REPO_ID,
                filename="embeddings/movie_ovierview_embeddings.npy",
                repo_type="dataset",
                cache_dir='/tmp/huggingface'
            )
            embeddings = np.load(embeddings_path)

        with st.spinner("Loading FAISS index..."):
            faiss_path = hf_hub_download(
                repo_id=REPO_ID,
                filename="indexes/movie_overview_index.faiss",
                repo_type="dataset",
                cache_dir='/tmp/huggingface'
            )
            index = faiss.read_index(faiss_path)

        return model, df, embeddings, index

    except Exception as e:
        st.error(f"Error loading model or data: {str(e)}")
        st.stop()

model, df, embeddings, index = load_model_and_data()
device = "cuda" if torch.cuda.is_available() else "cpu"

def semantic_search(query, k=15):
    try:
        query_vec = model.encode([query], convert_to_tensor=True, device=device)
        query_np = query_vec.cpu().numpy().astype("float32")

        D, I = index.search(query_np, k)
        similarities = cosine_similarity(query_np, embeddings[I[0]])[0]

        results = df.iloc[I[0]].copy()
        results["similarity"] = similarities
        results["poster_url"] = results["poster_path"].apply(
            lambda path: f"https://image.tmdb.org/t/p/w500{path}" if pd.notnull(path) else default_poster
        )
        return results.sort_values(by="similarity", ascending=False).reset_index(drop=True)
    except Exception as e:
        st.error(f"Error during search: {str(e)}")
        return pd.DataFrame()

# Main UI
query = st.text_input(
    "🔍 Enter a movie plot or description:",
    "An adventure of explorers lost in space for a wormhole and tries to survive on a distant planet.",
    help="Describe a movie plot or theme to find similar movies"
).strip()

if query:
    with st.spinner("Finding similar movies..."):
        results = semantic_search(query)

    if not results.empty:
        st.subheader(f"🔝 Top {len(results)} similar movies:")

        cols = st.columns(3)
        for idx, (_, row) in enumerate(results.iterrows()):
            with cols[idx % 3]:
                st.image(
                    row["poster_url"],
                    width=200,
                    caption=f"{row['title']} ({row['release_date'][:4]})"
                )
                with st.expander(f"Similarity: {row['similarity']:.2f}"):
                    st.write(row['overview'])
    else:
        st.warning("No results found. Try a different query.")

# Footer
st.markdown("---")
st.markdown("""
    **Built with** ❤️ **by [Arghya Ghosh](https://arghya.dev)**
    *Technologies used: FAISS + Sentence Transformers + Streamlit*
""")