Spaces:
Running
Running
File size: 1,181 Bytes
ca5b08e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import os
import json
import argparse
def main():
parser = argparse.ArgumentParser(description="Evaluate element_merge_detect task")
parser.add_argument(
"workspace",
help="The filesystem path where work will be stored, can be a local folder",
)
args = parser.parse_args()
json_dir = os.path.join(args.workspace, 'jsons')
if not os.path.exists(json_dir):
os.makedirs(json_dir)
jsonl_file = os.path.join(args.workspace, "data.jsonl")
with open(jsonl_file, "r") as f:
for line in f:
data = json.loads(line)
pdf_name_1 = data['pdf_name_1'].split(".")[0]
pdf_name_2 = data['pdf_name_2'].split(".")[0]
pdf_name,page_1 = pdf_name_1.split('_')
pdf_name,page_2 = pdf_name_2.split('_')
json_name = os.path.join(json_dir, pdf_name + '_' + page_1 + '_' + page_2 + '.json')
data = {
"page_1": "\n\n".join(data['md_elem_list_1']),
"page_2": "\n\n".join(data['md_elem_list_2']),
}
with open(json_name, 'w') as f:
json.dump(data, f)
if __name__ == "__main__":
main() |