sawadogosalif's picture
Update app/global_vars.py
5eee2bb verified
raw
history blame contribute delete
463 Bytes
import pandas as pd
import os
from datasets import load_dataset, DownloadConfig
from helpers.utils import extract_audio_identifier
DATA_FILE = "sawadogosalif/MooreFRCollections_BibleOnlyText"
data = load_dataset(DATA_FILE, split="train", download_config=DownloadConfig(token=os.environ["HF_TOKEN"])).to_pandas()
data[["chapter", "page"]] = data["moore_source_url"].apply(
lambda x: pd.Series(extract_audio_identifier(x))
)
BUCKET_NAME = "moore-collection"