import pandas as pd import sqlite3 import gradio as gr import unicodedata import re import ast import requests database_url = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv" database_path = "database.csv" description = "This app digs through Hugging Face’s public zip files hunting for RVC models… and occasionally brings back random stuff that has nothing to do with them. Don’t worry though—the best RVC matches are always shown first, because we like to pretend we’re organized." def clean_file_url(val): # If missing if pd.isna(val): return "" # If it's already a list (e.g. from JSON/df directly) if isinstance(val, list): return ", ".join(map(str, val)) # If it's a string like '["a","b"]' if isinstance(val, str) and val.strip().startswith("[") and val.strip().endswith("]"): try: parsed = ast.literal_eval(val) if isinstance(parsed, list): return ", ".join(map(str, parsed)) except Exception: return val # fallback: leave as-is # Otherwise, return as-is return str(val) def normalize(text: str) -> str: if pd.isna(text): return "" # Convert to lowercase text = text.lower() # Remove accents text = ''.join( c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn' ) # Replace separators with space return re.sub(r"[+()\-_/.]", " ", text) def search_files(query: str): if not query.strip(): return pd.DataFrame([{"Result": "Empty query"}]) keywords = normalize(query).split() whole_conditions = " AND ".join([ f"(FILENAME_NORM LIKE '% {k} %' OR FILENAME_NORM LIKE '{k} %' OR FILENAME_NORM LIKE '% {k}' OR FILENAME_NORM = '{k}')" for k in keywords ]) partial_conditions = " AND ".join([f"FILENAME_NORM LIKE '%{k}%'" for k in keywords]) sql = f""" SELECT *, CASE WHEN {whole_conditions} THEN 1 ELSE 0 END AS whole_match FROM files WHERE {partial_conditions} ORDER BY whole_match DESC, orig_index ASC; """ df = pd.read_sql(sql, conn) if df.empty: return "

No matches found

" df_subset = df.head(250) # limit 250 results rows = [] for i, row in enumerate(df_subset.itertuples(index=False)): filename = row.FILENAME url = row.PARSED_URL model_id = row.MODEL_ID rows.append(f""" {filename} {model_id} """) html = f""" {''.join(rows)}
Filename File URL Repo ID
""" return html response = requests.get(database_url, stream=True) with open(database_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) df = pd.read_csv(database_path) df["FILENAME_NORM"] = df["FILENAME"].apply(normalize) df["PARSED_URL"] = df["PARSED_URL"].apply(clean_file_url) df = df.reset_index(drop=True) df["orig_index"] = df.index # Connect to SQLite conn = sqlite3.connect(":memory:", check_same_thread=False) df.to_sql("files", conn, index=False, if_exists="replace") with gr.Blocks() as demo: gr.Markdown("## 🔍 RVC Voice Finder") query_input = gr.Textbox(label="Search here", placeholder="Hatsune Miku") button_query = gr.Button("Search") output = gr.HTML(label="Search Results") gr.Markdown(description) query_input.submit(search_files, inputs=query_input, outputs=output) button_query.click(search_files, inputs=query_input, outputs=output) if __name__ == "__main__": demo.launch(debug=True, show_error=True)