import sys import os import csv import streamlit as st import pandas as pd from datetime import date import pandasql as psql import base64 ################################ ######### Variables ############ ################################ # -- Loading Variables script_directory = os.path.dirname(os.path.abspath(sys.argv[0])) # -- Loading Session Data if 'project_data' not in st.session_state: st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv') if 'mapping_df' not in st.session_state: st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"]) ################################ ####### GenericFunctions ####### ################################ # -- Load Mapping File def load_mapping_file(): if 'project_name' in st.session_state: try: # print("project_name - "+st.session_state.project_name) cond = (st.session_state.project_data['Project'] == st.session_state.project_name) file_name = script_directory+'/data/'+str(st.session_state.project_data[cond].Id.values[0])+"_"+st.session_state.project_data[cond].Source.values[0]+"_"+st.session_state.project_data[cond].Destination.values[0]+'.csv' # print("file_name - "+file_name) st.session_state.mapping_df = pd.read_csv(file_name,sep="|",quoting=csv.QUOTE_NONE) except Exception as e: st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"]) st.error(f"Unable to load mapping file - {e}") ################################ ####### Display of data ######## ################################ # -- Streamlit Settings st.set_page_config(layout='wide') st.title("Data Generation") # -- Add Project Dropdown st.text("") st.text("") st.text("") col1, col2, col3 = st.columns(3) project_name = col1.selectbox( 'Select Project', st.session_state.project_data['Project'], key="project_name", on_change=load_mapping_file() ) # -- Upload Data if len(st.session_state.mapping_df)>0: st.text("") st.text("") st.text("") col1, col2, col3 = st.columns(3) cond = (st.session_state.project_data['Project'] == st.session_state.project_name) result = st.session_state.project_data[cond].Source.values[0] with col1: source_data_file = st.file_uploader( "Source data file name - "+str(result)+".csv", type="csv", key="source_data_file", accept_multiple_files=True ) # -- Button Show Data st.text("") st.text("") col1, col2, col3 = st.columns([0.3,0.5,2.2]) if col1.button("Show Data"): if source_data_file is not None: for file in source_data_file: df = pd.read_csv(file) # Update dataframe with Pandas Mapping Fields for index, row in st.session_state.mapping_df.iterrows(): if row['Type'] == 'Pandas': column_name = row['DestinationColumn'] expression = row['Expression'].replace("'", "") df[column_name] = eval(expression) # Creating SQL Statement sql_statement = "SELECT " for index, row in st.session_state.mapping_df.iterrows(): destination_column = row['DestinationColumn'] source_column = row['SourceColumn'] column_type = row['Type'] expression = row['Expression'] if 'Expression' in row else None if column_type == 'Constant': # Create a dummy column with the provided expression sql_statement += str(expression) + ' AS "' + str(destination_column) + '",' elif column_type == 'Pandas': sql_statement += '"' + str(destination_column) + '" AS "' + str(destination_column) + '",' else: # Use the source column as-is sql_statement += '"' + str(source_column) + '" AS "' + str(destination_column) + '",' # Remove the trailing comma and space sql_statement = sql_statement[:-1]+" from df" # st.write(sql_statement+" from df") st.session_state.df = df st.session_state.sql_statement = sql_statement # Display Data st.dataframe(df) # -- Button Generate Data if col2.button("Generate Data"): df = st.session_state.df if len(df) == 0 : st.error("No records available to run query, click on Show Data") else: sql_query = st.text_area(label="Sql Query", value=st.session_state.sql_statement, key="sql_query", height=200) try: result_df = psql.sqldf(sql_query, locals()) st.write("Query Result") st.dataframe(result_df) csv_data = result_df.to_csv(index=False,header=False,sep="|",quoting=csv.QUOTE_NONE) b64 = base64.b64encode(csv_data.encode()).decode() st.markdown(f'Download Result CSV', unsafe_allow_html=True) except Exception as e: st.error(f"Error executing SQL query: {str(e)}")