Spaces:
Sleeping
Sleeping
File size: 3,008 Bytes
787f9a6 9678fdb 787f9a6 9678fdb 787f9a6 bf7e729 787f9a6 bf7e729 787f9a6 bf7e729 787f9a6 bf7e729 787f9a6 bf7e729 787f9a6 9678fdb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import csv
import os
from ..config import FIGHTERS_CSV_PATH
def convert_height_to_cm(height_str):
"""
Converts a height string in the format 'X ft Y' to centimeters.
Returns the original string if the format is unexpected or empty.
"""
if not height_str or 'ft' not in height_str:
return height_str
try:
parts = height_str.split(' ft')
feet = int(parts[0].strip())
inches_str = parts[1].strip()
# Handle cases where inches might be missing (e.g., '6 ft')
inches = int(inches_str) if inches_str else 0
total_inches = (feet * 12) + inches
cm = total_inches * 2.54
return round(cm)
except (ValueError, IndexError):
# Return original value if parsing fails
return height_str
def preprocess_fighters_csv(file_path=FIGHTERS_CSV_PATH):
"""
Reads the fighters CSV, cleans names, converts height to cm,
and saves the changes back to the same file.
"""
if not os.path.exists(file_path):
print(f"Error: File not found at {file_path}")
return
try:
rows = []
headers = []
# Read all data from the CSV into memory first
with open(file_path, 'r', newline='', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)
# Return if there's no header or the file is empty
if not reader.fieldnames:
print(f"Warning: {file_path} is empty or has no headers.")
return
headers = reader.fieldnames
rows = list(reader)
# --- Data Cleaning and Processing ---
name_cleaned_count = 0
# Process the rows in memory
for row in rows:
# Clean fighter names (e.g., "O ftMalley" -> "O'Malley")
for col in ['first_name', 'last_name']:
if col in row and ' ft' in row[col]:
row[col] = row[col].replace(' ft', "'")
name_cleaned_count += 1
# Convert height to cm and remove the old column
if 'height' in row:
row['height_cm'] = convert_height_to_cm(row.pop('height'))
# Update the header name if 'height' was present
if 'height' in headers:
headers[headers.index('height')] = 'height_cm'
# Write the modified data back to the same file, overwriting it
with open(file_path, 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=headers)
writer.writeheader()
writer.writerows(rows)
print(f"Successfully processed file: {file_path}")
if name_cleaned_count > 0:
print(f"Cleaned {name_cleaned_count} instances of ' ft' in fighter names.")
if 'height_cm' in headers:
print("Converted 'height' column to centimeters and renamed it to 'height_cm'.")
except Exception as e:
print(f"An error occurred: {e}") |