Spaces:
Sleeping
Sleeping
File size: 4,218 Bytes
52e5488 2cca7fc db0e40a 944093d 2cca7fc 944093d 2cca7fc 2b74719 1736fe6 1607f5b 1736fe6 944093d db0e40a 3cb873e 2b74719 8364b2d 2b74719 8364b2d 2b74719 8364b2d db0e40a 4f47c49 5054d5e 4f32ea6 cf0f262 e0b5c19 4f47c49 2cca7fc 2b74719 2cca7fc f7c1cff 2cca7fc 63dc99a 2cca7fc 8210da7 2cca7fc 49ae11d 2cca7fc 8210da7 7e82a0b 2cca7fc 3cb873e f7c1cff 2cca7fc 7e82a0b 2cca7fc 63dc99a 7e82a0b 2b74719 7e82a0b 52e5488 63dc99a 53d4231 63dc99a 2cca7fc 52e5488 2cca7fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import polars as pl
# favourite_langs = {"English": "en", "Romanian": "ro", "German": "de", "-----": "-----"}
favourite_langs = {"English": "en", "Romanian": "ro", "German": "de"}
options = list(favourite_langs.keys())
models = ['ENRO', 'DERO']
# English, Romanian
def search_text(input_text, sselected_language, tselected_language, model_name, hits, toggle_case):
# df = pl.read_csv('hf://datasets/TiberiuCristianLeon/2RO/ENRO/ENRO.tsv', separator='\t')
# df = pl.read_parquet('hf://datasets/TiberiuCristianLeon/RSSNEWS/data/train-00000-of-00001.parquet')
# df = pl.read_parquet('https://huggingface.co/datasets/TiberiuCristianLeon/2RO/resolve/refs%2Fconvert%2Fparquet/default/train/0000.parquet')
path_to_model = f"https://huggingface.co/api/datasets/TiberiuCristianLeon/2RO/parquet/{model_name.lower()}/train/0.parquet"
df = pl.read_parquet(path_to_model)
# Filter rows
# df.filter(pl.col(sselected_language).str.contains(input_text)).head(hits)
# print(df.head(hits))
if toggle_case:
filtered = df.filter(pl.col(sselected_language).str.contains(input_text).alias("literal")) # case sensitive
else:
filtered = df.filter(pl.col(sselected_language).str.contains(f"(?i){input_text}").alias("literal")) # (?i) case insensitive
# filtered = df.filter(pl.col(sselected_language).str.contains_any([input_text], ascii_case_insensitive=True).alias("contains_any"))
print(toggle_case, filtered.head(hits))
# print(filtered)
# Extract rows
list_of_arrays = filtered.select([sselected_language, tselected_language]).head(hits)
# for dataframe type="numpy"
# list_of_arrays = filtered.select([sselected_language, tselected_language]).head(hits).to_numpy()
message_text = f'Done! Found {len(list_of_arrays)} entries'
return list_of_arrays, message_text
# Define a function to swap dropdown values
def swap_languages(src_lang, tgt_lang):
return tgt_lang, src_lang
def create_interface():
with gr.Blocks() as interface:
gr.Markdown("## Search Text in Dataset")
with gr.Row():
input_text = gr.Textbox(label="Enter text to search:", placeholder="Type your text here...", info="Press Enter key to start search")
with gr.Row():
sselected_language = gr.Dropdown(choices=options, value = options[0], label="Source language", interactive=True)
tselected_language = gr.Dropdown(choices=options, value = options[1], label="Target language", interactive=True)
swap_button = gr.Button("Swap Languages")
swap_button.click(fn=swap_languages, inputs=[sselected_language, tselected_language], outputs=[sselected_language, tselected_language])
toggle_case = gr.Checkbox(info="Case sensitive search", label="Toggle case sensitive search", value=True, interactive=True, visible=True)
model_name = gr.Dropdown(choices=models, label="Select a dataset", value = models[0], interactive=True)
search_button = gr.Button("Search")
translated_text = gr.Dataframe(label="Returned entries:", interactive=False, headers=[options[0], options [1]], datatype=["str", "str"], col_count=(2, "fixed"),
type="polars", wrap=True, show_row_numbers=False, show_copy_button=True)
message_text = gr.Textbox(label="Messages:", placeholder="Display field for status and error messages", interactive=False)
hits = gr.Slider(
minimum=1,
maximum=100,
value=10,
step=5,
label="Number of returned hits")
search_button.click(
search_text,
inputs=[input_text, sselected_language, tselected_language, model_name, hits, toggle_case],
outputs=[translated_text, message_text]
)
# Submit the form when Enter is pressed in the input_text textbox
input_text.submit(
search_text,
inputs=[input_text, sselected_language, tselected_language, model_name, hits, toggle_case],
outputs=[translated_text, message_text]
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch()
|