File size: 5,014 Bytes
6fe943b
7a16b87
 
1e5b08d
7a16b87
 
 
 
 
6fe943b
7a16b87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import streamlit as st
import pandas as pd
import numpy as np
import faiss
import torch
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from huggingface_hub import hf_hub_download

# Configure environment paths to avoid permission issues
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['STREAMLIT_SERVER_ROOT'] = '/tmp/streamlit'

REPO_ID = "uiuxarghya/intelliflix-store"
default_poster = "https://i.ibb.co/pHxqDX6/2ebfe3fcf82a4c6ccac494de2306a357.jpg"

# Set up Streamlit page configuration
st.set_page_config(
    page_title="IntelliFlix: Semantic Movie Recommender",
    page_icon="🎬",
    layout="centered",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://github.com/uiuxarghya/intelliflix/issues',
        'Report a bug': 'https://github.com/uiuxarghya/intelliflix/issues',
        'About': (
            "IntelliFlix is a powerful movie recommender built with Sentence Transformers "
            "and FAISS. It helps you find similar movies based on plot descriptions. Built by "
            "[Arghya Ghosh](https://arghya.dev). Try out the app and discover your next favorite movie!"
        )
    }
)

# Create necessary directories if they don't exist
os.makedirs('/tmp/huggingface', exist_ok=True)
os.makedirs('/tmp/streamlit', exist_ok=True)

st.title("🎬 IntelliFlix: Semantic Movie Recommender")
st.markdown("Find similar movies based on plot descriptions using Sentence Transformers + FAISS.")

@st.cache_resource(show_spinner=False)
def load_model_and_data():
    try:
        # Load model with explicit cache directory
        model = SentenceTransformer(
            "sentence-transformers/all-MiniLM-L12-v2",
            cache_folder='/tmp/huggingface'
        )

        # Load files from Hugging Face Hub
        with st.spinner("Loading movie data..."):
            csv_path = hf_hub_download(
                repo_id=REPO_ID,
                filename="data/tmdb_movies_dataset_processed.csv",
                repo_type="dataset",
                cache_dir='/tmp/huggingface'
            )
            df = pd.read_csv(csv_path)

        with st.spinner("Loading embeddings..."):
            embeddings_path = hf_hub_download(
                repo_id=REPO_ID,
                filename="embeddings/movie_ovierview_embeddings.npy",
                repo_type="dataset",
                cache_dir='/tmp/huggingface'
            )
            embeddings = np.load(embeddings_path)

        with st.spinner("Loading FAISS index..."):
            faiss_path = hf_hub_download(
                repo_id=REPO_ID,
                filename="indexes/movie_overview_index.faiss",
                repo_type="dataset",
                cache_dir='/tmp/huggingface'
            )
            index = faiss.read_index(faiss_path)

        return model, df, embeddings, index

    except Exception as e:
        st.error(f"Error loading model or data: {str(e)}")
        st.stop()

model, df, embeddings, index = load_model_and_data()
device = "cuda" if torch.cuda.is_available() else "cpu"

def semantic_search(query, k=15):
    try:
        query_vec = model.encode([query], convert_to_tensor=True, device=device)
        query_np = query_vec.cpu().numpy().astype("float32")

        D, I = index.search(query_np, k)
        similarities = cosine_similarity(query_np, embeddings[I[0]])[0]

        results = df.iloc[I[0]].copy()
        results["similarity"] = similarities
        results["poster_url"] = results["poster_path"].apply(
            lambda path: f"https://image.tmdb.org/t/p/w500{path}" if pd.notnull(path) else default_poster
        )
        return results.sort_values(by="similarity", ascending=False).reset_index(drop=True)
    except Exception as e:
        st.error(f"Error during search: {str(e)}")
        return pd.DataFrame()

# Main UI
query = st.text_input(
    "πŸ” Enter a movie plot or description:",
    "An adventure of explorers lost in space for a wormhole and tries to survive on a distant planet.",
    help="Describe a movie plot or theme to find similar movies"
).strip()

if query:
    with st.spinner("Finding similar movies..."):
        results = semantic_search(query)

    if not results.empty:
        st.subheader(f"πŸ” Top {len(results)} similar movies:")

        cols = st.columns(3)
        for idx, (_, row) in enumerate(results.iterrows()):
            with cols[idx % 3]:
                st.image(
                    row["poster_url"],
                    width=200,
                    caption=f"{row['title']} ({row['release_date'][:4]})"
                )
                with st.expander(f"Similarity: {row['similarity']:.2f}"):
                    st.write(row['overview'])
    else:
        st.warning("No results found. Try a different query.")

# Footer
st.markdown("---")
st.markdown("""
    **Built with** ❀️ **by [Arghya Ghosh](https://arghya.dev)**
    *Technologies used: FAISS + Sentence Transformers + Streamlit*
""")