Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gspread | |
import pandas as pd | |
from google.oauth2 import service_account | |
# CREDENTIALS_INFO = { | |
# "type": "service_account", | |
# "project_id": "cru-ocr", | |
# "private_key_id": "ee936d111292eb13521edf3d201eb85ca4391824", | |
# "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCxFAxXwyy+VS3M\nVKu42MYGlpTc68vuA5ZxR4ZL3ukXrOtBKTCw6XwXx87dPvczAkxAcxgmUMCOPFJN\nRnt2bpOqGmOxgjIZ245LZXULgjMddLGiPR7mD4mejX/4zHfXZZpnmQrl6Ix4Y2/S\nhD2UbG+bub1qUPbYbycGnVf537tiZlP7OTNRo3S5Xsvacx8Tj8OUiGZhtkbnqqYy\nF9dgheJp0nPICZbTnDswCeSGQKVH65eVakwxAg3Aeeugqhjmoh+ornZwwSFd2UfV\nlo5UuBrxccKmM71p1a2eCudL5wqnwXkCfmoQrylyT5bNRANcEhYGjk6jJFaaKnkQ\nnjIFC2sHAgMBAAECggEANcsWWM7k18k+iXUrWZMYzUWPYXGMWPjkCfOle4TzIIsa\nSIg/z26OkRbU4+dN50QKcAXGz1T2uf7fLbR8qyS6XRF5OaKIn8xP9N2UafOanZcm\no1eX/GG5992ag7VxrpCiEFiws9kqWyQyAyzDHES4vwD05shDxMo3e83uvOzXmvNj\ngiTsdgVYQMzQt5RtsrH+bxKZ5DV7cyDzr2cINjUHziOvdwKEB7konw+rLNPTOlhK\ntK5dG9zN5E8CPnKraYC2tZB5NmJqFUDrq9P1YHWT6EdNsaHLlHHTG/pNrjklfZnl\nZJbwM23Y+3XAdlfKsXSPpskg+DPfxXL7cQTZVDsH+QKBgQDgmttFrHslaHl9pMUt\ny0DNRFHCkm3v5/bJ6lIC7F2MOtw2b48hpzjnvMsJbH6xxUWkQW4FYBpnzqa0YBXk\nwnH/eY+zq4FKQoMDhR75oPNBU6n5BPmTaB5wGjjI4MypjoC79iKaCZ6V6Px4+9ib\nOpoXUOty9LYTNTBB0Qube8BCjQKBgQDJ1IZswI+6LmHy/8zqQpciosBx6ITehQb6\n8X7u3K5mNz/SMxlS9C5YMTAKMefop9QPecaVC1XFiVnZou9LjfKVCeEr4/+1eSPF\nwFP5GJfT5WmWKHQtz0rZTFtSz1zwSAbz0buCIERNAOmF/xb26I3AC6mSy4tcxeHc\n+0pLBGwI4wKBgErWrotnrlzHk/uuhFj+6ae7xPZtLh6LDys2XX9F3OHV1vx4bZvM\nCWUF/i00rn5zegICHzPBUusV62wcvA7OT4fNrHk0g08IHHl2yNxqqcMxqmgkJTjd\nr46w3gzpAqjYp8J5gAwNen7+8+koGYOXojJ0rw9NxMFfrqWvjwuOz4AdAoGAe4PX\nXDif/NWj1d1b30UvTuABG/SrU65fbjVac/2TsTRAl3f0GIMc1ZYMi0CtZwFGUs44\njD/qlsAOv5TqEvfkq/bm2UBn3fwruzqPaVL2n5O3AVDygJJqgP8sqEoE23uI3a/N\nq73pbqKPRxSsTiBVl2DLvu1X9UeYiO80MSKcpvcCgYB295NMYiFwLtvaVk7kFhY3\nGuNTyuDD/sgMCsABDlJlG3KF2l4BWBO1BG1qHyRtmMrFjGHIV8BKDjjJLsT13REb\nVwFr+V0Jo+9f1yyjfpakrGTBO6eQJZhrJcGIkEVS3BVIC4pP0Hxt8UKlo/XtZbPN\n8n4ZEXdlalE56RzEsbuXHA==\n-----END PRIVATE KEY-----\n", | |
# "client_email": "cru-ocr-service-account@cru-ocr.iam.gserviceaccount.com", | |
# "client_id": "108232587703192834621", | |
# "auth_uri": "https://accounts.google.com/o/oauth2/auth", | |
# "token_uri": "https://oauth2.googleapis.com/token", | |
# "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", | |
# "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/cru-ocr-service-account%40cru-ocr.iam.gserviceaccount.com", | |
# "universe_domain": "googleapis.com", | |
# } | |
CREDENTIALS_INFO = { | |
"type": "service_account", | |
"project_id": "connect-card-scanner", | |
"private_key_id": "54224814c69a155d8bd34128e83e373e0f1caa6f", | |
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDNCRYK1FfJAVaK\nMxznDAr+SFZfqfD6m3MiAfKHJk83cY9JerNOOhc2TwaP8FhZdRs/+2k0FE84Rms2\nUoa6XMSC7JhlobaRFVjtVxljim+ake19i6iGk1kI6/BUYwwfrd1Emw1Dy1eP0rC0\nRoX8gfurF8HR4k4SuFnsSB62f7ttOiP6e0PFxDJxuRG68ua6gRRjWRZf49KEMz5T\nVnWRNzwnUAfd7XuK1MdXnKWG4KWUtLbYa5bv5n47mV3JLc8nwbDZ0redZNeSYf5L\nj0CD/swoNBm8YLzhPoNGPt/2c77Y9Gj5d9Fo3Pg5VuMQXgzpsyCnFPv/sEJi/JXZ\nHkH9JXnzAgMBAAECggEAYcH2R32MyWKg702Fd0fPqgO1YvE021foogtJplSGqomz\nZrZg7WhXDubI2zId/bEPKAdn1pPkXZF9pq3cXNjEPSQvWS2sTSpfdvHzQfmMUqdH\nE3fWBywT5GQR9zouWqBcAkznGy7FdeZfp+SEF6ul4aJ3H+oFjXlmLnkIY70tENsw\nOBs6Q+ffipJXhv5AQ0fGJWLaJ9cywRgyXODfX3mg38M2M53wx25CyN9kSN1ua1W3\nKhziHPCw2zqtRyTUWiA1vPYpN9mCQgz5TLXIRv+6nuwowlIVI/zvpUt9pWZtec2x\n4LYp63XIlYvl37qckh/yuj8DTkRem+ks3eApEVas2QKBgQDm6RZ5bxpr0qT8GdvY\nm8WvwQwHnnwco+WfJEsjlKkDg3Nw6y1aDtS7+DmMwSI5U2Md2tX4y+zqBYHOdVNf\nVjvSEokQKTd3qEeTiJTDGZ49OsaPhnaouqYPyFk1p/loAh0+31rMlAE6Kxi3USwK\n2EpNi0gMApLnAMsoqgD7o/lubQKBgQDjUEUKUJ2jgOOVFwK6DKy/xOx6BJl12vhY\n+JGGNFtKwmMl9GmYaGcXbmlF2NfzAxO4uxdl11U/7LBAvFGQZLGMDfOJy7xcy+95\nzEjpTuHTxB8lTed2ILIyJOwfJRAQO11VB2R6uHbwIJMJWTDwyIv5EG5a+/ZPqErV\nixg0NJXN3wKBgGG874JfALP66VK8L040QSzvbYQcFTSaOyttVVCuMAwIq+hz4zJn\nbKxTmSh252GUZjPQ2RkCWDmGMzeMecm02oVEyzdH+u5vEDzmZvFd+pi4NCu0Iq2w\ns3Giv//yJaNcobxnFivZydsxOrj9ZsMAYhMIjWpn/H5C27tOmjPpaD7RAoGARrjk\nog7u3L3vEKW2HXhwDsIP6O6haD+WYOgFLsH/XUUZX+epKtfgqzOY4ThUB7F/Y0wi\nPXc/eMIFHD77CXeqna6BhO+0TRLOERDz5lK6hA5SumKAjwohJuTB6fa4BrTRlvDT\n3DKkHpWj6ZasWV2r3vOzwe7+dU4g6kt6XlO1//UCgYEAgcp1IdttP9Yj/P+ZQ3Ld\nd2Ujwu9EVtc0bVOtfpLXWi27Zva2+M5oUof1vLpHhSbnCnmz2E4D4JzxHRsMxsZG\n/LIaLjAGMpPYxgK5CSo5FU5KC9ZL3nNjE/2JXq/Cx9Ua7q0S3vvJ8HUxncrFaA/d\nfOeHHOIZTbRFwo0zCggO16g=\n-----END PRIVATE KEY-----\n", | |
"client_email": "connect-card-scanner@connect-card-scanner.iam.gserviceaccount.com", | |
"client_id": "114871390457241513072", | |
"auth_uri": "https://accounts.google.com/o/oauth2/auth", | |
"token_uri": "https://oauth2.googleapis.com/token", | |
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", | |
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/connect-card-scanner%40connect-card-scanner.iam.gserviceaccount.com", | |
"universe_domain": "googleapis.com", | |
} | |
CREDENTIALS = service_account.Credentials.from_service_account_info( | |
CREDENTIALS_INFO, | |
scopes=[ | |
"https://www.googleapis.com/auth/cloud-platform", | |
"https://www.googleapis.com/auth/spreadsheets", | |
"https://www.googleapis.com/auth/drive", | |
], | |
) | |
PROJECT_ID = "connect-card-scanner" | |
LOCATION = "us" | |
PROCESSOR_ID = "30861bab1d979b83" | |
# 1kzkUCcgvuS5AQ04fnivru93-G1RZIZnjxwLrRZUMccM | |
ALL_FIELDS_COMBINED = [ | |
"Name", | |
"Phone", | |
"Email", | |
"Cadet", | |
"Greek or Going Greek", | |
"Transfer Student", | |
"Military Veteran", | |
"International Student", | |
"Res Hall", | |
"Room #", | |
"Off Campus", | |
"Fr", | |
"So", | |
"Jr", | |
"Sr", | |
"Grad Student", | |
"Male", | |
"Female", | |
"Non Binary", | |
"Spiritual Survey Yes", | |
"Spiritual Survey No", | |
"Spiritual Survey Maybe", | |
"Social Event Yes", | |
"Social Event No", | |
"Social Event Maybe", | |
"Small Group Yes", | |
"Small Group No", | |
"Small Group Maybe", | |
] | |
SHEET_COLUMNS = [ | |
"Timestamp", | |
"Name", | |
"Gender", | |
"Year", | |
"Phone Number", | |
"Email", | |
"Do any of these describe you?", | |
"Do you live...", # Off campus / On campus | |
"Which Res Hall are you in?", | |
"What is your room number?", | |
"Giving your opinion in a campus wide spiritual survey", | |
"Social Events with Cru", | |
"A small group Bible Study", | |
"Getting our Cru weekly email", # if three 'yeses', then add this | |
] | |
def convert_df_to_cleaned_format(df): | |
"""Convert dataframe to cleaned format""" | |
# df = pd.DataFrame([list(dict_values)], columns=ALL_FIELDS_COMBINED) | |
# Year processing | |
year_map = { | |
"Fr": "Freshman", | |
"So": "Sophomore", | |
"Jr": "Junior", | |
"Sr": "Senior", | |
"Grad Student": "Graduate Student", | |
} | |
df["Year"] = df.apply( | |
lambda row: next( | |
( | |
year | |
for year in [ | |
"Fr", | |
"So", | |
"Jr", | |
"Sr", | |
"Grad Student", | |
] | |
if row[year] | |
), | |
"", | |
), | |
axis=1, | |
) | |
df["Year"] = df["Year"].map(year_map) | |
df.drop( | |
columns=["Fr", "So", "Jr", "Sr", "Grad Student"], | |
inplace=True, | |
) | |
# Add timestamp | |
df["Timestamp"] = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S") | |
# Combine Male and Female into Gender | |
df["Gender"] = df.apply( | |
lambda row: next( | |
(gender for gender in ["Male", "Female", "Non Binary"] if row[gender]), "" | |
), | |
axis=1, | |
) | |
df.drop(columns=["Male", "Female", "Non Binary"], inplace=True) | |
# Combine Small Group Yes, No, Maybe into one column | |
df["A small group Bible Study"] = df.apply( | |
lambda row: "Yes" | |
if row["Small Group Yes"] | |
else ( | |
"No" | |
if row["Small Group No"] | |
else ("Maybe" if row["Small Group Maybe"] else "") | |
), | |
axis=1, | |
) | |
df.drop( | |
columns=["Small Group Yes", "Small Group No", "Small Group Maybe"], inplace=True | |
) | |
# Combine Social Event Yes, No, Maybe into one column | |
df["Social Events with Cru"] = df.apply( | |
lambda row: "Yes" | |
if row["Social Event Yes"] | |
else ( | |
"No" | |
if row["Social Event No"] | |
else ("Maybe" if row["Social Event Maybe"] else "") | |
), | |
axis=1, | |
) | |
df.drop( | |
columns=["Social Event Yes", "Social Event No", "Social Event Maybe"], | |
inplace=True, | |
) | |
# Combine Spiritual Survey Yes, No, Maybe into one column | |
df["Giving your opinion in a campus wide spiritual survey"] = df.apply( | |
lambda row: "Yes" | |
if row["Spiritual Survey Yes"] | |
else ( | |
"No" | |
if row["Spiritual Survey No"] | |
else ("Maybe" if row["Spiritual Survey Maybe"] else "") | |
), | |
axis=1, | |
) | |
df.drop( | |
columns=[ | |
"Spiritual Survey Yes", | |
"Spiritual Survey No", | |
"Spiritual Survey Maybe", | |
], | |
inplace=True, | |
) | |
df["Do any of these describe you?"] = df.apply( | |
lambda row: ", ".join( | |
[ | |
field | |
for field in [ | |
"Cadet", | |
"Greek or Going Greek", | |
"Transfer Student", | |
"Military Veteran", | |
"International Student", | |
] | |
if row[field] | |
] | |
), | |
axis=1, | |
) | |
df.drop( | |
columns=[ | |
"Cadet", | |
"Greek or Going Greek", | |
"Transfer Student", | |
"Military Veteran", | |
"International Student", | |
], | |
inplace=True, | |
) | |
# Res Hall processing | |
res_hall_map = { | |
"cochrange": "Cochrane", | |
"cid": "Creativity & Innovation District", | |
"creativity and innovation district": "Creativity & Innovation District", | |
"creativity & innovation district": "Creativity & Innovation District", | |
"east aj": "EAJ", | |
"eaj": "EAJ", | |
"east campbell": "East Campbell", | |
"east egg": "East Eggleston", | |
"east eggleston": "East Eggleston", | |
"donaldson brown": "GLC", | |
"graduate life center": "GLC", | |
"graduate life center at donaldson brown": "GLC", | |
"glc": "GLC", | |
"harper": "Harper", | |
"hoge": "Hoge", | |
"hillcrest": "Hillcrest", | |
"johnson": "Johnson", | |
"johnson hall": "Johnson", | |
"main campbell": "Main Campbell", | |
"main egg": "Main Eggleston", | |
"main eggleston": "Main Eggleston", | |
"miles": "Miles", | |
"new hall": "New Hall West", | |
"new hall west": "New Hall West", | |
"nhw": "New Hall West", | |
"new res": "New Res East", | |
"new res east": "New Res East", | |
"nre": "New Res East", | |
"shag": "OShag", | |
"oshag": "OShag", | |
"oshaughnessy": "OShag", | |
"payne": "Payne", | |
"pearson - east": "Pearson - East", | |
"pearson-east": "Pearson - East", | |
"pe": "Pearson - East", | |
"ep": "Pearson - East", | |
"phe": "Pearson - East", | |
"pearson - west": "Pearson - West", | |
"pearson-west": "Pearson - West", | |
"pw": "Pearson - West", | |
"wp": "Pearson - West", | |
"phw": "Pearson - West", | |
"py": "PY", | |
"p-y": "PY", | |
"peddrew-yates": "PY", | |
"peddrew yates": "PY", | |
"p.y.": "PY", | |
"slusher tower": "Slusher Tower", | |
"slusher": "Slusher Tower", | |
"slusher wing": "Slusher Wing", | |
"upper quad north": "Upper Quad North", | |
"uqhn": "Upper Quad North", | |
"uqn": "Upper Quad North", | |
"vawter": "Vawter", | |
"west aj": "WAJ", | |
"waj": "WAJ", | |
"west egg": "West Eggleston", | |
"west eggleston": "West Eggleston", | |
"whitehurst": "Whitehurst", | |
} | |
df["Which Res Hall are you in?"] = ( | |
df["Res Hall"] | |
.str.lower() | |
.str.strip() | |
.str.replace("'", "") | |
.replace(".", "") | |
.replace("\n", " ") | |
.map(res_hall_map) | |
.fillna(df["Res Hall"]) | |
) | |
df.drop(columns=["Res Hall"], inplace=True) | |
df["Getting our Cru weekly email"] = df.apply( | |
lambda row: "Yes" | |
if all( | |
row[field] == "Yes" | |
for field in [ | |
"A small group Bible Study", | |
"Social Events with Cru", | |
"Giving your opinion in a campus wide spiritual survey", | |
] | |
) | |
else "No", | |
axis=1, | |
) | |
df["Phone Number"] = df["Phone"][:10] # keep only first 10 digits | |
df.drop(columns=["Phone"], inplace=True) | |
df["Do you live..."] = df.apply( | |
lambda row: "On Campus" if not row["Off Campus"] else "Off Campus", axis=1 | |
) | |
df.drop(columns=["Off Campus"], inplace=True) | |
df["What is your room number?"] = df["Room #"] | |
df.drop(columns=["Room #"], inplace=True) | |
df = df.replace({"☐": "", None: ""}) | |
# reorder columns to match SHEET_COLUMNS | |
df = df[SHEET_COLUMNS] | |
return df | |
def upload_to_google_sheets(df): | |
"""Uploads the edited DataFrame to a Google Sheet by appending to existing data.""" | |
df = convert_df_to_cleaned_format(df) | |
spreadsheet_name = "AI Scanning Hold" | |
worksheet_name = "2025-2026" | |
# Authenticate with Google Sheets | |
gc = gspread.authorize(CREDENTIALS) | |
# Open the Google Sheet | |
try: | |
spreadsheet = gc.open(spreadsheet_name) | |
except gspread.SpreadsheetNotFound: | |
spreadsheet = gc.create(spreadsheet_name) | |
# Select the worksheet | |
try: | |
worksheet = spreadsheet.worksheet(worksheet_name) | |
except gspread.WorksheetNotFound: | |
worksheet = spreadsheet.add_worksheet( | |
title=worksheet_name, rows="100", cols="20" | |
) | |
# Check if the worksheet is empty or has headers | |
existing_data = worksheet.get_all_values() | |
df_headers = df.columns.values.tolist() | |
if not existing_data: | |
# If worksheet is empty, add headers first, then all data rows | |
all_data = [df_headers] + df.values.tolist() | |
worksheet.update(all_data) | |
return f"Data uploaded successfully to {spreadsheet_name} - {worksheet_name} (new sheet with headers). Added {len(df)} rows." | |
else: | |
# Check if headers exist and match | |
existing_headers = existing_data[0] | |
if existing_headers == df_headers: | |
# Headers match exactly, append all data rows | |
for _, row in df.iterrows(): | |
worksheet.append_row(row.tolist()) | |
return f"Data appended successfully to {spreadsheet_name} - {worksheet_name}. Added {len(df)} rows." | |
elif len(existing_headers) == len(df_headers) and all( | |
h.strip() for h in existing_headers | |
): | |
# Sheet has headers but they don't match exactly | |
# Create a mapping to ensure data goes to correct columns | |
header_mapping = {} | |
# Try to map columns by matching header names (case-insensitive, strip whitespace) | |
for i, df_header in enumerate(df_headers): | |
for j, existing_header in enumerate(existing_headers): | |
if df_header.strip().lower() == existing_header.strip().lower(): | |
header_mapping[i] = j | |
break | |
if len(header_mapping) == len(df_headers): | |
# All columns can be mapped | |
for _, row in df.iterrows(): | |
# Reorder the row data to match existing column order | |
reordered_row = [""] * len(existing_headers) | |
for df_col_idx, existing_col_idx in header_mapping.items(): | |
reordered_row[existing_col_idx] = row.iloc[df_col_idx] | |
worksheet.append_row(reordered_row) | |
return f"Data appended successfully to {spreadsheet_name} - {worksheet_name}. Added {len(df)} rows (columns reordered to match existing headers)." | |
else: | |
# Cannot map all columns - add headers and data anyway but with warning | |
# First add the new headers as a comment row or handle differently | |
for _, row in df.iterrows(): | |
worksheet.append_row(row.tolist()) | |
return f"Data appended to {spreadsheet_name} - {worksheet_name}. Added {len(df)} rows (WARNING: Column headers don't match existing headers)." | |
else: | |
# Sheet appears to be empty (no real headers) or has different number of columns | |
# Treat as empty and add headers | |
worksheet.clear() | |
all_data = [df_headers] + df.values.tolist() | |
worksheet.update(all_data) | |
return f"Data uploaded successfully to {spreadsheet_name} - {worksheet_name} (replaced existing data with headers). Added {len(df)} rows." | |