Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import io | |
import re | |
import base64 | |
import streamlit as st | |
import pandas as pd | |
import pandasql as psql | |
################################ | |
######### Variables ############ | |
################################ | |
# -- Loading Variables | |
script_directory = os.path.dirname(os.path.abspath(sys.argv[0])) | |
file_details = pd.DataFrame(columns=['file_name', 'data']) | |
# -- Loading Session Data | |
if 'project_data' not in st.session_state: | |
st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv') | |
if 'global_dataframe' not in st.session_state: | |
st.session_state.global_dataframe=file_details | |
if 'load_sql' not in st.session_state: | |
st.session_state.load_sql=False | |
if 'run_sql' not in st.session_state: | |
st.session_state.run_sql=False | |
################################ | |
####### GenericFunctions ####### | |
################################ | |
# -- Create Dynamic Columns | |
def generate_column_names(end): | |
if 1 > end: | |
raise ValueError("End value must be grater than 1") | |
column_names = [f"Col{i}" for i in range(1, end+2)] | |
return column_names | |
# -- Add missing separator | |
def add_missing_separators(file_data,separator,max_header_count): | |
# Create a list to hold the modified rows | |
modified_rows = [] | |
for line in file_data: | |
# Count the occurrences of the separator | |
count = line.count(separator) | |
# Append the separator if the count is less than the max_header_count | |
if count < max_header_count: | |
separator_str=separator * (max_header_count - count) | |
line = line + separator_str | |
# Added modified line | |
modified_rows.append(line) | |
return modified_rows | |
# -- Create global dataframes | |
def create_global_df(sep=",", usecols=None, max_header_count=1): | |
file_details = pd.DataFrame(columns=['file_name','data']) | |
try: | |
if uploaded_files is not None: | |
for file in uploaded_files: | |
if usecols is not None: | |
file_data = io.StringIO(file.read().decode()) | |
modified_rows = add_missing_separators(file_data, sep,max_header_count) | |
df = pd.DataFrame(each_row.split(sep) for each_row in modified_rows) | |
df.columns = usecols | |
else: | |
df = pd.read_csv(file, sep=sep) | |
pattern = r'([^/]+)\.csv$' | |
match = re.search(pattern, file.name) | |
file_name = match.group(1) | |
file_details.loc[len(file_details)] = { | |
'file_name':file_name, | |
'data':df | |
} | |
st.session_state.global_dataframe = file_details | |
except Exception as e: | |
st.error(f"Error processing csv: {str(e)}") | |
raise e | |
# -- Load global dataframes | |
def load_global_df(): | |
if st.session_state.header: | |
print("Added Headers") | |
usecols = generate_column_names(st.session_state.header_count) | |
create_global_df(sep,usecols,st.session_state.header_count) | |
else: | |
print("No Headers Added") | |
create_global_df(sep) | |
# -- Run SQL Data | |
def run_sql_df(): | |
for index, row in st.session_state.global_dataframe.iterrows(): | |
globals()['%s' % row['file_name']] = row['data'] | |
try: | |
sql_query = st.text_area(label="Sql Query", value="", key="sql_query", height=200) | |
if st.button("Run SQL Query"): | |
result_df = psql.sqldf(sql_query, globals()) | |
st.write("Query Result") | |
st.dataframe(result_df) | |
csv_data = result_df.to_csv(index=False) | |
b64 = base64.b64encode(csv_data.encode()).decode() | |
st.markdown(f'<a href="data:file/csv;base64,{b64}" download="result.csv">Download Result CSV</a>', unsafe_allow_html=True) | |
except Exception as e: | |
st.error(f"Error executing SQL query: {str(e)}") | |
################################ | |
####### Display of data ######## | |
################################ | |
# -- Streamlit Settings | |
st.set_page_config(layout='wide') | |
st.title("Data Play Ground") | |
# -- Delimiter | |
st.text("") | |
st.text("") | |
st.text("") | |
col1, col2, col3 = st.columns(3) | |
delimiter = col1.selectbox( | |
label="File Delimiter", | |
options=[",","|"], | |
key="delimiter" | |
) | |
# -- Upload Sample Files | |
st.text("") | |
st.text("") | |
col1, col2, col3, col4 = st.columns([1,0.3,0.7,1]) | |
uploaded_files = col1.file_uploader( | |
"Choose a file", | |
type="csv", | |
key="uploaded_files", | |
accept_multiple_files=True | |
) | |
# -- Add header Indicator | |
header=col3.checkbox( | |
label='Add Header', | |
key="header" | |
) | |
# -- Dynamic Headers Count | |
if header: | |
header_count=col4.number_input( | |
label="No of Header", | |
value=2, | |
key="header_count", | |
min_value=1, | |
max_value=100, | |
step=1 | |
) | |
# -- Load Data | |
st.text("") | |
col1, col2, col3 = st.columns([1,1,8]) | |
sep = st.session_state.delimiter | |
if col1.button("Load Data"): | |
st.session_state.load_sql=True | |
st.session_state.run_sql=False | |
load_global_df() | |
# -- Run SQL Query | |
if col2.button("SQL"): | |
st.session_state.load_sql=False | |
st.session_state.run_sql=True | |
run_sql_df() | |
# -- Display SQL Query Data | |
if st.session_state.run_sql: | |
run_sql_df() | |
# -- Display Loaded Data | |
if (len(st.session_state.global_dataframe)>0 and st.session_state.load_sql): | |
# print("Count of stored files - "+str(len(st.session_state.global_dataframe))) | |
col1, col2, col3 = st.columns(3) | |
col1.selectbox( | |
label="Select Table Name", | |
key="table_name", | |
options=st.session_state.global_dataframe['file_name'] | |
) | |
for index, row in st.session_state.global_dataframe.iterrows(): | |
globals()['%s' % row['file_name']] = row['data'] | |
st.dataframe(psql.sqldf("select * from "+st.session_state.table_name, globals())) |