Spaces:
No application file
No application file
import pandas as pd | |
import numpy as np | |
import requests | |
import os | |
import csv | |
import argparse | |
# your settings | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--csv_file', type=str, default=None) | |
parser.add_argument('--output_file', type=str, default=None) | |
parser.add_argument('--token', type=str, default='songid') | |
parser.add_argument('--lyric_batch_size', type=int, default=100) | |
args = parser.parse_args() | |
csv_path = args.csv_file | |
output_file = args.output_file | |
writer = csv.writer(open(output_file, 'w')) | |
sep = ',' | |
batch_size = args.lyric_batch_size | |
def main(): | |
df = pd.read_csv(csv_path) | |
songids = df[args.token].astype(str).tolist() | |
# tags = df.genre.astype(str).tolist() | |
print('total {} samples to extract ...'.format(len(songids))) | |
n_batch = int(len(songids)/batch_size) + 1 | |
n = 0 | |
zh_k = 0 | |
for i in range(n_batch): | |
sub_songids = songids[i*batch_size:(i+1)*batch_size] | |
resq_params = {'id': ', '.join(sub_songids), 'clean': 'deep'} | |
resq = requests.post('http://11.181.92.137:8080/lyric_pull', json=resq_params) | |
results = resq.json() | |
for j in range(len(results)): | |
if results[j]['lyric'] != '': | |
line = [results[j]['id'], results[j]['lyric']] | |
writer.writerow(line) | |
else: | |
pass | |
n = n + 1 | |
print('finish {} samples ...'.format(n)) | |
if __name__ == '__main__': | |
main() | |