import pandas as pd 
import copy
import os 
import gradio as gr
from collections import Counter
import random

# CONSTANTS
NAME_COL = 'Juggler_Name'
NUM_WORKSHOPS_COL = 'Num_Workshops'
AVAIL_COL = 'Availability'
DESCRIP_COL = 'Workshop_Descriptions'
DELIMITER = ';'

class Schedule:
  def __init__(self, timeslots: dict):
    self.num_timeslots_filled = 0
    self.total_num_workshops = 0 

    for time,instructors in timeslots.items(): 
        curr_len = len(instructors)
        if curr_len > 0:
            self.num_timeslots_filled += 1 
            self.total_num_workshops += curr_len
     
    self.timeslots = timeslots

  def add(self, person: str, time: str):
    self.total_num_workshops += 1 
    if len(self.timeslots[time]) == 0: 
        self.num_timeslots_filled += 1
    self.timeslots[time].append(person)

  def remove(self, person: str, time: str): 
    self.total_num_workshops -= 1 
    if len(self.timeslots[time]) == 1: 
        self.num_timeslots_filled -= 1 
    self.timeslots[time].remove(person)


# Returns True if the person can teach during the slot, and False otherwise
def can_teach(person: str, slot: list, capacity: int) -> bool: 
    if len(slot) == capacity or len(slot) > capacity: 
        return False 
    
    # No one can teach two workshops at once
    if person in slot: 
        return False 
    
    return True 


# Extracts relevant information from the df with availability and puts it into a useable format
def convert_df(df):
    people = []
    # Key: person's name 
    # Value: a list of their availability 
    availability = {}
    seen = set()
    for row in range(len(df)): 
        # TODO: make sure no people with the same name fill out the form 
        name = df.loc[row, NAME_COL]
            
        number = df.loc[row, NUM_WORKSHOPS_COL]
        if number == 1: 
            people.append(name)

        # Add people who are teaching multiple workshops to the list more than once 
        else: 
            for i in range(number): 
                people.append(name)

        curr_avail = df.loc[row, AVAIL_COL]
        curr_avail = curr_avail.split(DELIMITER)
        curr_avail = [elem.strip() for elem in curr_avail]
        availability[name] = curr_avail 

    return people, availability 


# Returns False if curr is NaN, and True otherwise
def is_defined(curr): 
    # if curr != curr, then curr is NaN for some reason
    if curr != curr: 
        return False 
    else: 
        return True
    
# Returns True if curr is defined and its length is greater than 0
def is_valid(curr): 
    return (is_defined(curr) and len(curr) > 0)

# Makes a dictionary where each key is a timeslot and each value is a list. 
# If there's no partial schedule, each list will be empty.  
# If there's a partial schedule, each list will include the people teaching during that slot.
def initialize_timeslots(df) -> dict: 
    all_timeslots = set()
    availability = df[AVAIL_COL]
    for elem in availability: 
        curr_list = elem.split(DELIMITER)
        for inner in curr_list: 
            all_timeslots.add(inner.strip())

    to_return = {}
    for slot in all_timeslots: 
        to_return[slot] = []

    return to_return


# Recursive function that generates all possible schedules
def find_all_schedules(people: list, availability: dict, schedule_obj: Schedule, capacity: int, schedules: list, max_list: list) -> None: 
    if schedule_obj.num_timeslots_filled > max_list[0] or schedule_obj.num_timeslots_filled == max_list[0]: 
        schedules.append(copy.deepcopy(schedule_obj))
        max_list[0] = schedule_obj.num_timeslots_filled
    
    # Base case
    if len(people) == 0: 
        return 
    
    
    # Recursive cases 
    person = people[0]
    
    for time in availability[person]:
        if can_teach(person, schedule_obj.timeslots[time], capacity): 
            # Choose (put that person in that timeslot)
            schedule_obj.add(person, time)

            # Explore (assign everyone else to timeslots based on that decision)
            if len(people) == 1: 
                find_all_schedules([], availability, schedule_obj, capacity, schedules, max_list)

            else: 
                find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_list)

            # Unchoose (remove that person from the timeslot)
            schedule_obj.remove(person, time)
        # NOTE: this will not generate a full timeslot, but could still lead to a good schedule
        else: 
            if len(people) == 1: 
                find_all_schedules([], availability, schedule_obj, capacity, schedules, max_list)
            else: 
                find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_list)
            

    return


# Makes an organized DataFrame given a list of schedules
def make_df(schedules: list, descrip_dict: dict): 
    all_times = []
    all_instructors = []
    seen = []

    count = 1

    for i in range (len(schedules)): 
        curr_sched = schedules[i]

        if curr_sched in seen: 
            continue 
        else: 
            seen.append(curr_sched)

        # Sort dictionary by keys
        sorted_dict = dict(sorted(curr_sched.items(), key=lambda item: item[0]))
        curr_times = sorted_dict.keys() 
        curr_instructors = sorted_dict.values() 

        # Include an empty row between schedules
        if count != 1: 
            all_times.append("")
            all_instructors.append("")

        if len(schedules) > 0: 
            all_times.append(f"Schedule #{count}")
            all_instructors.append("")
            count += 1

        for slot in curr_times: 
            all_times.append(slot)

        for instructors in curr_instructors: 
            if len(descrip_dict) == 0: 
                all_instructors.append("; ". join(instructors))

            # The format will be: Time: Instructor (Workshop); Instructor (Workshop)
            if len(descrip_dict) > 0: 
                string = ""
                for person in instructors: 
                    if person in descrip_dict: 
                        descrip = descrip_dict[person]
                    else: 
                        descrip = "Workshop"
                    if len(descrip) > 0:
                        descrip = descrip.replace(DELIMITER, f" OR ")
                        string += f"{person} ({descrip}); "
                    else: 
                        string += f"{person}"
                string = string.strip("; ")
                all_instructors.append(string)
                    
        
    new_df = pd.DataFrame({
        "Schedule": all_times, 
        "Instructor(s)": all_instructors
    })

    return new_df, count - 1


# Returns the stripped version of the column name
# or the default one if the user didn't input a column name
def get_var_name(var, default): 
    if var is None or len(var) == 0: 
        return default
    else: 
        return var.strip()


# Returns an error message, empty DataFrame, and blank csv file
def error_msg(message: str): 
    empty = pd.DataFrame({"Schedule": ["ERROR"], "Instructor": ["ERROR"]})
    directory = os.path.abspath(os.getcwd())
    path = directory + "/schedules/ERROR.csv" 
    empty.to_csv(path, index=False)
    return "ERROR: " + message, empty, path


# Returns column names that aren't in the csv file
def find_missing_cols(df_columns: list, names: list, file: str) -> str: 
    missing = []
    for elem in names: 
        if elem not in df_columns: 
            missing.append(elem)

    double_check = f"""These are the columns in your file: {"; ".join(df_columns)}. Please double check your spelling/punctuation and try again."""

    if len(missing) == 0: 
        return ""
    elif len(missing) == 1: 
        return f'I cannot find this column in the {file} file you uploaded: {missing[0]}. {double_check}'
    elif len(missing) == 2: 
        return f'I cannot find these columns in the {file} file you uploaded: {missing[0]} and {missing[1]}. {double_check}'
    else: 
        message = f"I cannot find these columns in the {file} file you uploaded: "
        for i in range(len(missing)):
            col = missing[i]
            if i != len(missing) - 1: 
                message += col + ", "
            else: 
                message += "and " + col + ". "
        message += double_check
        return message
    

# Makes a dictionary where each key is the instructor's name and 
# the value is the workshop(s) they're teaching
def get_description_dict(df): 
    new_dict = {}
    for row in range(len(df)): 
        name = df.loc[row, NAME_COL]
        new_dict[name] = df.loc[row, DESCRIP_COL]
    return new_dict


# Classifies schedules into two categories: complete and incomplete: 
# Complete = everyone is teaching desired number of timeslots and each timeslot is filled 
# NOTE: I'm using "valid" instead of "complete" as a variable name so that I don't mix it up
# Incomplete = not complete 
def classify_schedules(people: list, schedules: list, partial_names: list, total_timeslots: int, max_timeslots_filled: int) -> tuple: 
    valid_schedules = []

    # Key: score 
    # Value: schedules with that score
    incomplete_schedules = {}

    # Get frequency of items in the list 
    # Key: person 
    # Value: number of workshops they WANT to teach
    pref_dict = Counter(people)

    pref_dict.update(Counter(partial_names))
    
    all_names = pref_dict.keys() 
    
    # Evaluate each schedule
    overall_max = 0
    for sched in schedules: 
        if sched.num_timeslots_filled != max_timeslots_filled:
            continue
        # Key: person 
        # Value: how many workshops they're ACTUALLY teaching in this schedule
        freq_dict = {}
        for name in all_names: 
            freq_dict[name] = 0
        
        for timeslot, instructor_list in sched.timeslots.items(): 
            for instructor in instructor_list: 
                if instructor in freq_dict: 
                    freq_dict[instructor] += 1 
                else: 
                    print("there is a serious issue!!!!")

        # See if everyone is teaching their desired number of workshops 
        everyone_is_teaching = True
        for teacher, freq in freq_dict.items(): 
            if freq != pref_dict[teacher]: 
                #print(f"teacher: {teacher}. preference: {pref_dict[teacher]}. actual frequency: {freq}")
                everyone_is_teaching = False 
                break 

        filled_all_timeslots = (sched.num_timeslots_filled == total_timeslots) 
        if everyone_is_teaching and filled_all_timeslots: 
            valid_schedules.append(sched)
        else: 
            # No need to add to incomplete_schedules if there's at least one valid schedule 
            if len(valid_schedules) > 0: 
                continue 
            #print(f"teaching desired number of timeslots: {everyone_is_teaching}. At least one workshop per slot: {filled_all_timeslots}.\n{sched}\n")
            if sched.num_timeslots_filled not in incomplete_schedules: 
                incomplete_schedules[sched.num_timeslots_filled] = []
            incomplete_schedules[sched.num_timeslots_filled].append(sched)

            if sched.num_timeslots_filled > overall_max: 
                overall_max = sched.num_timeslots_filled 
    
    if len(valid_schedules) > 0: 
        return valid_schedules, []
    else: 
        return [], incomplete_schedules[overall_max]


# Parameters: schedules that have the max number of timeslots filled 
# Returns: a list of all schedules that have the max number of workshops
    # To make it less overwhelming, it will return {cutoff} randomly 
def get_best_schedules(schedules: list, cutoff: str) -> list: 
    cutoff = int(cutoff)
    overall_max = 0 
    best_schedules = {}
    for sched in schedules: 
        if sched.total_num_workshops not in best_schedules: 
            best_schedules[sched.total_num_workshops] = []
        best_schedules[sched.total_num_workshops].append(sched.timeslots)
        if sched.total_num_workshops > overall_max: 
            overall_max = sched.total_num_workshops
    all_best_schedules = best_schedules[overall_max]
    if cutoff == -1: 
        return all_best_schedules
    else: 
        if len(all_best_schedules) > cutoff: 
            # Sample without replacement
            return random.sample(all_best_schedules, cutoff)
        else:
            return all_best_schedules


# Big wrapper function that calls the other functions
def main(df, capacity:int, num_results: int):     
    descrip_dict = get_description_dict(df)    
    
    # Convert the df with everyone's availability to a usable format
    res = convert_df(df)
    people = res[0]
    availability = res[1]

    partial_names = []

    timeslots = initialize_timeslots(df) 

    schedules = []
    schedule_obj = Schedule(timeslots)
    max_list = [0]
 
    find_all_schedules(people, availability, schedule_obj, capacity, schedules, max_list)

    total_timeslots = len(timeslots)


    res = classify_schedules(people, schedules, partial_names, total_timeslots, max_list[0])
    valid_schedules = res[0]
    decent_schedules = res[1]

    
    # Return schedules
    if len(valid_schedules) > 0: 
        best_schedules = get_best_schedules(valid_schedules, num_results)
        res = make_df(best_schedules, descrip_dict)
        new_df = res[0]
        count = res[1]
        if count == 1: 
            results = "Good news! I was able to make a schedule." 
        else: 
            results = "Good news! I was able to make multiple schedules."
        
    else:
        best_schedules = get_best_schedules(decent_schedules, num_results)
        res = make_df(best_schedules, descrip_dict)
        new_df = res[0]
        count = res[1]
        beginning = "Unfortunately, I wasn't able to make a complete schedule, but here"
        if count == 1: 
            results = f"{beginning} is the best option."
        else: 
            results = f"{beginning} are the best options."
    

    directory = os.path.abspath(os.getcwd())
    path = directory + "/schedules/schedule.csv" 
    new_df.to_csv(path, index=False)
    return results, new_df, path