Spaces:
Running
Running
import os | |
import json | |
import argparse | |
def main(): | |
parser = argparse.ArgumentParser(description="Evaluate element_merge_detect task") | |
parser.add_argument( | |
"workspace", | |
help="The filesystem path where work will be stored, can be a local folder", | |
) | |
args = parser.parse_args() | |
json_dir = os.path.join(args.workspace, 'jsons') | |
if not os.path.exists(json_dir): | |
os.makedirs(json_dir) | |
jsonl_file = os.path.join(args.workspace, "data.jsonl") | |
with open(jsonl_file, "r") as f: | |
for line in f: | |
data = json.loads(line) | |
pdf_name_1 = data['pdf_name_1'].split(".")[0] | |
pdf_name_2 = data['pdf_name_2'].split(".")[0] | |
pdf_name,page_1 = pdf_name_1.split('_') | |
pdf_name,page_2 = pdf_name_2.split('_') | |
json_name = os.path.join(json_dir, pdf_name + '_' + page_1 + '_' + page_2 + '.json') | |
data = { | |
"page_1": "\n\n".join(data['md_elem_list_1']), | |
"page_2": "\n\n".join(data['md_elem_list_2']), | |
} | |
with open(json_name, 'w') as f: | |
json.dump(data, f) | |
if __name__ == "__main__": | |
main() |