import pandas as pd import copy import os import gradio as gr from collections import Counter import random # CONSTANTS NAME_COL = 'Juggler_Name' NUM_WORKSHOPS_COL = 'Num_Workshops' AVAIL_COL = 'Availability' DESCRIP_COL = 'Workshop_Descriptions' DELIMITER = ';' class Schedule: def __init__(self, timeslots: dict): self.num_timeslots_filled = 0 self.total_num_workshops = 0 for time,instructors in timeslots.items(): curr_len = len(instructors) if curr_len > 0: self.num_timeslots_filled += 1 self.total_num_workshops += curr_len self.timeslots = timeslots def add(self, person: str, time: str): self.total_num_workshops += 1 if len(self.timeslots[time]) == 0: self.num_timeslots_filled += 1 self.timeslots[time].append(person) def remove(self, person: str, time: str): self.total_num_workshops -= 1 if len(self.timeslots[time]) == 1: self.num_timeslots_filled -= 1 self.timeslots[time].remove(person) # Returns True if the person can teach during the slot, and False otherwise def can_teach(person: str, slot: list, capacity: int) -> bool: if len(slot) == capacity or len(slot) > capacity: return False # No one can teach two workshops at once if person in slot: return False return True # Extracts relevant information from the df with availability and puts it into a useable format def convert_df(df): people = [] # Key: person's name # Value: a list of their availability availability = {} seen = set() for row in range(len(df)): # TODO: make sure no people with the same name fill out the form name = df.loc[row, NAME_COL] number = df.loc[row, NUM_WORKSHOPS_COL] if number == 1: people.append(name) # Add people who are teaching multiple workshops to the list more than once else: for i in range(number): people.append(name) curr_avail = df.loc[row, AVAIL_COL] curr_avail = curr_avail.split(DELIMITER) curr_avail = [elem.strip() for elem in curr_avail] availability[name] = curr_avail return people, availability # Returns False if curr is NaN, and True otherwise def is_defined(curr): # if curr != curr, then curr is NaN for some reason if curr != curr: return False else: return True # Returns True if curr is defined and its length is greater than 0 def is_valid(curr): return (is_defined(curr) and len(curr) > 0) # Makes a dictionary where each key is a timeslot and each value is a list. # If there's no partial schedule, each list will be empty. # If there's a partial schedule, each list will include the people teaching during that slot. def initialize_timeslots(df) -> dict: all_timeslots = set() availability = df[AVAIL_COL] for elem in availability: curr_list = elem.split(DELIMITER) for inner in curr_list: all_timeslots.add(inner.strip()) to_return = {} for slot in all_timeslots: to_return[slot] = [] return to_return # Recursive function that generates all possible schedules def find_all_schedules(people: list, availability: dict, schedule_obj: Schedule, capacity: int, schedules: list, max_list: list) -> None: if schedule_obj.num_timeslots_filled > max_list[0] or schedule_obj.num_timeslots_filled == max_list[0]: schedules.append(copy.deepcopy(schedule_obj)) max_list[0] = schedule_obj.num_timeslots_filled # Base case if len(people) == 0: return # Recursive cases person = people[0] for time in availability[person]: if can_teach(person, schedule_obj.timeslots[time], capacity): # Choose (put that person in that timeslot) schedule_obj.add(person, time) # Explore (assign everyone else to timeslots based on that decision) if len(people) == 1: find_all_schedules([], availability, schedule_obj, capacity, schedules, max_list) else: find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_list) # Unchoose (remove that person from the timeslot) schedule_obj.remove(person, time) # NOTE: this will not generate a full timeslot, but could still lead to a good schedule else: if len(people) == 1: find_all_schedules([], availability, schedule_obj, capacity, schedules, max_list) else: find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_list) return # Makes an organized DataFrame given a list of schedules def make_df(schedules: list, descrip_dict: dict): all_times = [] all_instructors = [] seen = [] count = 1 for i in range (len(schedules)): curr_sched = schedules[i] if curr_sched in seen: continue else: seen.append(curr_sched) # Sort dictionary by keys sorted_dict = dict(sorted(curr_sched.items(), key=lambda item: item[0])) curr_times = sorted_dict.keys() curr_instructors = sorted_dict.values() # Include an empty row between schedules if count != 1: all_times.append("") all_instructors.append("") if len(schedules) > 0: all_times.append(f"Schedule #{count}") all_instructors.append("") count += 1 for slot in curr_times: all_times.append(slot) for instructors in curr_instructors: if len(descrip_dict) == 0: all_instructors.append("; ". join(instructors)) # The format will be: Time: Instructor (Workshop); Instructor (Workshop) if len(descrip_dict) > 0: string = "" for person in instructors: if person in descrip_dict: descrip = descrip_dict[person] else: descrip = "Workshop" if len(descrip) > 0: descrip = descrip.replace(DELIMITER, f" OR ") string += f"{person} ({descrip}); " else: string += f"{person}" string = string.strip("; ") all_instructors.append(string) new_df = pd.DataFrame({ "Schedule": all_times, "Instructor(s)": all_instructors }) return new_df, count - 1 # Returns the stripped version of the column name # or the default one if the user didn't input a column name def get_var_name(var, default): if var is None or len(var) == 0: return default else: return var.strip() # Returns an error message, empty DataFrame, and blank csv file def error_msg(message: str): empty = pd.DataFrame({"Schedule": ["ERROR"], "Instructor": ["ERROR"]}) directory = os.path.abspath(os.getcwd()) path = directory + "/schedules/ERROR.csv" empty.to_csv(path, index=False) return "ERROR: " + message, empty, path # Returns column names that aren't in the csv file def find_missing_cols(df_columns: list, names: list, file: str) -> str: missing = [] for elem in names: if elem not in df_columns: missing.append(elem) double_check = f"""These are the columns in your file: {"; ".join(df_columns)}. Please double check your spelling/punctuation and try again.""" if len(missing) == 0: return "" elif len(missing) == 1: return f'I cannot find this column in the {file} file you uploaded: {missing[0]}. {double_check}' elif len(missing) == 2: return f'I cannot find these columns in the {file} file you uploaded: {missing[0]} and {missing[1]}. {double_check}' else: message = f"I cannot find these columns in the {file} file you uploaded: " for i in range(len(missing)): col = missing[i] if i != len(missing) - 1: message += col + ", " else: message += "and " + col + ". " message += double_check return message # Makes a dictionary where each key is the instructor's name and # the value is the workshop(s) they're teaching def get_description_dict(df): new_dict = {} for row in range(len(df)): name = df.loc[row, NAME_COL] new_dict[name] = df.loc[row, DESCRIP_COL] return new_dict # Classifies schedules into two categories: complete and incomplete: # Complete = everyone is teaching desired number of timeslots and each timeslot is filled # NOTE: I'm using "valid" instead of "complete" as a variable name so that I don't mix it up # Incomplete = not complete def classify_schedules(people: list, schedules: list, partial_names: list, total_timeslots: int, max_timeslots_filled: int) -> tuple: valid_schedules = [] # Key: score # Value: schedules with that score incomplete_schedules = {} # Get frequency of items in the list # Key: person # Value: number of workshops they WANT to teach pref_dict = Counter(people) pref_dict.update(Counter(partial_names)) all_names = pref_dict.keys() # Evaluate each schedule overall_max = 0 for sched in schedules: if sched.num_timeslots_filled != max_timeslots_filled: continue # Key: person # Value: how many workshops they're ACTUALLY teaching in this schedule freq_dict = {} for name in all_names: freq_dict[name] = 0 for timeslot, instructor_list in sched.timeslots.items(): for instructor in instructor_list: if instructor in freq_dict: freq_dict[instructor] += 1 else: print("there is a serious issue!!!!") # See if everyone is teaching their desired number of workshops everyone_is_teaching = True for teacher, freq in freq_dict.items(): if freq != pref_dict[teacher]: #print(f"teacher: {teacher}. preference: {pref_dict[teacher]}. actual frequency: {freq}") everyone_is_teaching = False break filled_all_timeslots = (sched.num_timeslots_filled == total_timeslots) if everyone_is_teaching and filled_all_timeslots: valid_schedules.append(sched) else: # No need to add to incomplete_schedules if there's at least one valid schedule if len(valid_schedules) > 0: continue #print(f"teaching desired number of timeslots: {everyone_is_teaching}. At least one workshop per slot: {filled_all_timeslots}.\n{sched}\n") if sched.num_timeslots_filled not in incomplete_schedules: incomplete_schedules[sched.num_timeslots_filled] = [] incomplete_schedules[sched.num_timeslots_filled].append(sched) if sched.num_timeslots_filled > overall_max: overall_max = sched.num_timeslots_filled if len(valid_schedules) > 0: return valid_schedules, [] else: return [], incomplete_schedules[overall_max] # Parameters: schedules that have the max number of timeslots filled # Returns: a list of all schedules that have the max number of workshops # To make it less overwhelming, it will return {cutoff} randomly def get_best_schedules(schedules: list, cutoff: str) -> list: cutoff = int(cutoff) overall_max = 0 best_schedules = {} for sched in schedules: if sched.total_num_workshops not in best_schedules: best_schedules[sched.total_num_workshops] = [] best_schedules[sched.total_num_workshops].append(sched.timeslots) if sched.total_num_workshops > overall_max: overall_max = sched.total_num_workshops all_best_schedules = best_schedules[overall_max] if cutoff == -1: return all_best_schedules else: if len(all_best_schedules) > cutoff: # Sample without replacement return random.sample(all_best_schedules, cutoff) else: return all_best_schedules # Big wrapper function that calls the other functions def main(df, capacity:int, num_results: int): descrip_dict = get_description_dict(df) # Convert the df with everyone's availability to a usable format res = convert_df(df) people = res[0] availability = res[1] partial_names = [] timeslots = initialize_timeslots(df) schedules = [] schedule_obj = Schedule(timeslots) max_list = [0] find_all_schedules(people, availability, schedule_obj, capacity, schedules, max_list) total_timeslots = len(timeslots) res = classify_schedules(people, schedules, partial_names, total_timeslots, max_list[0]) valid_schedules = res[0] decent_schedules = res[1] # Return schedules if len(valid_schedules) > 0: best_schedules = get_best_schedules(valid_schedules, num_results) res = make_df(best_schedules, descrip_dict) new_df = res[0] count = res[1] if count == 1: results = "Good news! I was able to make a schedule." else: results = "Good news! I was able to make multiple schedules." else: best_schedules = get_best_schedules(decent_schedules, num_results) res = make_df(best_schedules, descrip_dict) new_df = res[0] count = res[1] beginning = "Unfortunately, I wasn't able to make a complete schedule, but here" if count == 1: results = f"{beginning} is the best option." else: results = f"{beginning} are the best options." directory = os.path.abspath(os.getcwd()) path = directory + "/schedules/schedule.csv" new_df.to_csv(path, index=False) return results, new_df, path