kevinwang676's picture
Upload folder using huggingface_hub
6755a2d verified
import pandas as pd
import numpy as np
import requests
import os
import csv
import argparse
# your settings
parser = argparse.ArgumentParser()
parser.add_argument('--csv_file', type=str, default=None)
parser.add_argument('--output_file', type=str, default=None)
parser.add_argument('--token', type=str, default='songid')
parser.add_argument('--lyric_batch_size', type=int, default=100)
args = parser.parse_args()
csv_path = args.csv_file
output_file = args.output_file
writer = csv.writer(open(output_file, 'w'))
sep = ','
batch_size = args.lyric_batch_size
def main():
df = pd.read_csv(csv_path)
songids = df[args.token].astype(str).tolist()
# tags = df.genre.astype(str).tolist()
print('total {} samples to extract ...'.format(len(songids)))
n_batch = int(len(songids)/batch_size) + 1
n = 0
zh_k = 0
for i in range(n_batch):
sub_songids = songids[i*batch_size:(i+1)*batch_size]
resq_params = {'id': ', '.join(sub_songids), 'clean': 'deep'}
resq = requests.post('http://11.181.92.137:8080/lyric_pull', json=resq_params)
results = resq.json()
for j in range(len(results)):
if results[j]['lyric'] != '':
line = [results[j]['id'], results[j]['lyric']]
writer.writerow(line)
else:
pass
n = n + 1
print('finish {} samples ...'.format(n))
if __name__ == '__main__':
main()