Git_commit_classification / data /data_converter.py
ManojAlexender's picture
Upload folder using huggingface_hub
1007d88 verified
raw
history blame contribute delete
722 Bytes
import json
# Input and output file paths
input_filepath = 'commit_data_hpc.jsonl'
output_filepath = 'commit_data_hpc_modified.jsonl'
with open(input_filepath, 'r') as infile, open(output_filepath, 'w') as outfile:
idx = 1 # Starting index
for line in infile:
# Parse the line as JSON
data = json.loads(line.strip())
# Rename 'commit_message' key to 'func'
data['func'] = data.pop('commit_message')
# Add 'idx' key
data['idx'] = idx
# Serialize back to JSON and write to the output file
outfile.write(json.dumps(data) + '\n')
# Increment the index
idx += 1
print("File modification complete!")