Spaces:
Running
Running
import pandas as pd | |
import sqlite3 | |
import gradio as gr | |
import unicodedata | |
import re | |
import ast | |
import requests | |
database_url = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv" | |
database_path = "database.csv" | |
description = "This app digs through Hugging Face’s public zip files hunting for RVC models… and occasionally brings back random stuff that has nothing to do with them. Don’t worry though—the best RVC matches are always shown first, because we like to pretend we’re organized." | |
def clean_file_url(val): | |
# If missing | |
if pd.isna(val): | |
return "" | |
# If it's already a list (e.g. from JSON/df directly) | |
if isinstance(val, list): | |
return ", ".join(map(str, val)) | |
# If it's a string like '["a","b"]' | |
if isinstance(val, str) and val.strip().startswith("[") and val.strip().endswith("]"): | |
try: | |
parsed = ast.literal_eval(val) | |
if isinstance(parsed, list): | |
return ", ".join(map(str, parsed)) | |
except Exception: | |
return val # fallback: leave as-is | |
# Otherwise, return as-is | |
return str(val) | |
def normalize(text: str) -> str: | |
if pd.isna(text): | |
return "" | |
# Convert to lowercase | |
text = text.lower() | |
# Remove accents | |
text = ''.join( | |
c for c in unicodedata.normalize('NFD', text) | |
if unicodedata.category(c) != 'Mn' | |
) | |
# Replace separators with space | |
return re.sub(r"[+()\-_/.]", " ", text) | |
def search_files(query: str): | |
if not query.strip(): | |
return pd.DataFrame([{"Result": "Empty query"}]) | |
keywords = normalize(query).split() | |
whole_conditions = " AND ".join([ | |
f"(FILENAME_NORM LIKE '% {k} %' OR FILENAME_NORM LIKE '{k} %' OR FILENAME_NORM LIKE '% {k}' OR FILENAME_NORM = '{k}')" | |
for k in keywords | |
]) | |
partial_conditions = " AND ".join([f"FILENAME_NORM LIKE '%{k}%'" for k in keywords]) | |
sql = f""" | |
SELECT *, | |
CASE WHEN {whole_conditions} THEN 1 ELSE 0 END AS whole_match | |
FROM files | |
WHERE {partial_conditions} | |
ORDER BY whole_match DESC, orig_index ASC; | |
""" | |
df = pd.read_sql(sql, conn) | |
if df.empty: | |
return "<p>No matches found</p>" | |
df_subset = df.head(250) # limit 250 results | |
rows = [] | |
for i, row in enumerate(df_subset.itertuples(index=False)): | |
filename = row.FILENAME | |
url = row.PARSED_URL | |
model_id = row.MODEL_ID | |
rows.append(f""" | |
<tr> | |
<td>{filename}</td> | |
<td> | |
<input type="text" value="{url}" id="copytext{i}" readonly | |
style="width:300px; padding:4px; border-radius:6px; border:1px solid #666; | |
background-color:var(--block-background-fill); | |
color:var(--body-text-color);" /> | |
<button style="margin-left:5px; padding:4px 8px; border-radius:6px; | |
background-color:var(--button-primary-background-fill); | |
color:var(--button-primary-text-color); | |
border:none; cursor:pointer;" | |
onclick="navigator.clipboard.writeText(document.getElementById('copytext{i}').value)"> | |
Copy | |
</button> | |
</td> | |
<td>{model_id}</td> | |
</tr> | |
""") | |
html = f""" | |
<table border=1 style="border-collapse:collapse; width:100%; text-align:left;"> | |
<thead> | |
<tr> | |
<th style="padding:6px;">Filename</th> | |
<th style="padding:6px;">File URL</th> | |
<th style="padding:6px;">Repo ID</th> | |
</tr> | |
</thead> | |
<tbody> | |
{''.join(rows)} | |
</tbody> | |
</table> | |
""" | |
return html | |
response = requests.get(database_url, stream=True) | |
with open(database_path, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
df = pd.read_csv(database_path) | |
df["FILENAME_NORM"] = df["FILENAME"].apply(normalize) | |
df["PARSED_URL"] = df["PARSED_URL"].apply(clean_file_url) | |
df = df.reset_index(drop=True) | |
df["orig_index"] = df.index | |
# Connect to SQLite | |
conn = sqlite3.connect(":memory:", check_same_thread=False) | |
df.to_sql("files", conn, index=False, if_exists="replace") | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🔍 RVC Voice Finder") | |
query_input = gr.Textbox(label="Search here", placeholder="Hatsune Miku") | |
button_query = gr.Button("Search") | |
output = gr.HTML(label="Search Results") | |
gr.Markdown(description) | |
query_input.submit(search_files, inputs=query_input, outputs=output) | |
button_query.click(search_files, inputs=query_input, outputs=output) | |
if __name__ == "__main__": | |
demo.launch(debug=True, show_error=True) | |