Spaces:
Sleeping
Sleeping
File size: 6,313 Bytes
81edf33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
import os
import sys
import io
import re
import base64
import streamlit as st
import pandas as pd
import pandasql as psql
################################
######### Variables ############
################################
# -- Loading Variables
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
file_details = pd.DataFrame(columns=['file_name', 'data'])
# -- Loading Session Data
if 'project_data' not in st.session_state:
st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv')
if 'global_dataframe' not in st.session_state:
st.session_state.global_dataframe=file_details
if 'load_sql' not in st.session_state:
st.session_state.load_sql=False
if 'run_sql' not in st.session_state:
st.session_state.run_sql=False
################################
####### GenericFunctions #######
################################
# -- Create Dynamic Columns
def generate_column_names(end):
if 1 > end:
raise ValueError("End value must be grater than 1")
column_names = [f"Col{i}" for i in range(1, end+2)]
return column_names
# -- Add missing separator
def add_missing_separators(file_data,separator,max_header_count):
# Create a list to hold the modified rows
modified_rows = []
for line in file_data:
# Count the occurrences of the separator
count = line.count(separator)
# Append the separator if the count is less than the max_header_count
if count < max_header_count:
separator_str=separator * (max_header_count - count)
line = line + separator_str
# Added modified line
modified_rows.append(line)
return modified_rows
# -- Create global dataframes
def create_global_df(sep=",", usecols=None, max_header_count=1):
file_details = pd.DataFrame(columns=['file_name','data'])
try:
if uploaded_files is not None:
for file in uploaded_files:
if usecols is not None:
file_data = io.StringIO(file.read().decode())
modified_rows = add_missing_separators(file_data, sep,max_header_count)
df = pd.DataFrame(each_row.split(sep) for each_row in modified_rows)
df.columns = usecols
else:
df = pd.read_csv(file, sep=sep)
pattern = r'([^/]+)\.csv$'
match = re.search(pattern, file.name)
file_name = match.group(1)
file_details.loc[len(file_details)] = {
'file_name':file_name,
'data':df
}
st.session_state.global_dataframe = file_details
except Exception as e:
st.error(f"Error processing csv: {str(e)}")
raise e
# -- Load global dataframes
def load_global_df():
if st.session_state.header:
print("Added Headers")
usecols = generate_column_names(st.session_state.header_count)
create_global_df(sep,usecols,st.session_state.header_count)
else:
print("No Headers Added")
create_global_df(sep)
# -- Run SQL Data
def run_sql_df():
for index, row in st.session_state.global_dataframe.iterrows():
globals()['%s' % row['file_name']] = row['data']
try:
sql_query = st.text_area(label="Sql Query", value="", key="sql_query", height=200)
if st.button("Run SQL Query"):
result_df = psql.sqldf(sql_query, globals())
st.write("Query Result")
st.dataframe(result_df)
csv_data = result_df.to_csv(index=False)
b64 = base64.b64encode(csv_data.encode()).decode()
st.markdown(f'<a href="data:file/csv;base64,{b64}" download="result.csv">Download Result CSV</a>', unsafe_allow_html=True)
except Exception as e:
st.error(f"Error executing SQL query: {str(e)}")
################################
####### Display of data ########
################################
# -- Streamlit Settings
st.set_page_config(layout='wide')
st.title("Data Play Ground")
# -- Delimiter
st.text("")
st.text("")
st.text("")
col1, col2, col3 = st.columns(3)
delimiter = col1.selectbox(
label="File Delimiter",
options=[",","|"],
key="delimiter"
)
# -- Upload Sample Files
st.text("")
st.text("")
col1, col2, col3, col4 = st.columns([1,0.3,0.7,1])
uploaded_files = col1.file_uploader(
"Choose a file",
type="csv",
key="uploaded_files",
accept_multiple_files=True
)
# -- Add header Indicator
header=col3.checkbox(
label='Add Header',
key="header"
)
# -- Dynamic Headers Count
if header:
header_count=col4.number_input(
label="No of Header",
value=2,
key="header_count",
min_value=1,
max_value=100,
step=1
)
# -- Load Data
st.text("")
col1, col2, col3 = st.columns([1,1,8])
sep = st.session_state.delimiter
if col1.button("Load Data"):
st.session_state.load_sql=True
st.session_state.run_sql=False
load_global_df()
# -- Run SQL Query
if col2.button("SQL"):
st.session_state.load_sql=False
st.session_state.run_sql=True
run_sql_df()
# -- Display SQL Query Data
if st.session_state.run_sql:
run_sql_df()
# -- Display Loaded Data
if (len(st.session_state.global_dataframe)>0 and st.session_state.load_sql):
# print("Count of stored files - "+str(len(st.session_state.global_dataframe)))
col1, col2, col3 = st.columns(3)
col1.selectbox(
label="Select Table Name",
key="table_name",
options=st.session_state.global_dataframe['file_name']
)
for index, row in st.session_state.global_dataframe.iterrows():
globals()['%s' % row['file_name']] = row['data']
st.dataframe(psql.sqldf("select * from "+st.session_state.table_name, globals())) |