File size: 2,196 Bytes
60e8b74
53950c0
 
 
60e8b74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303845a
60e8b74
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import numpy as np
import gradio as gr
import pandas as pd
from zipfile import ZipFile

def zip_two_files(data1, data2):
  with ZipFile('my_csvs.zip', 'w') as csv_zip:
    csv_zip.writestr("primary_data.csv", data1.to_csv(index=False))
    csv_zip.writestr("secondary_data.csv", data2.to_csv(index=False))
  return 'my_csvs.zip'

def get_split(csv_file,target_columns,primary_cols,combination_of ):
  df = pd.read_csv(csv_file.name, delimiter=",")
  target_columns = [target_columns] 
  primary_cols = primary_cols.split(',') + target_columns
  combination_of = combination_of.split(',')
  secondary_cols = list(set(df.columns.tolist()) - set(primary_cols))

  

  df["Comb"] = (
                df[combination_of]
                .astype(str)
                .agg(lambda x: ",".join(x.values), axis=1)
                .T
            )

  secondary_df = pd.DataFrame({'Id_Apres': range(1, len(df['Comb'].unique())+1), 'Comb': df['Comb'].unique()})
  secondary_df = secondary_df.merge(df[['Comb']+secondary_cols], on=['Comb']).drop_duplicates(subset=['Comb']).drop(columns=['Comb'])
  secondary_df = secondary_df.reset_index(drop=True)

  primary_df = df.merge(secondary_df, on=combination_of).drop(columns=combination_of)
  primary_df = primary_df[primary_cols+['Id_Apres']]
  primary_df = primary_df.reset_index()
  
  return zip_two_files(primary_df,secondary_df)



iface = gr.Interface(fn = get_split,
                     inputs = [
                         gr.inputs.File(label='CSV file') ,
                         gr.inputs.Textbox(label='Target Column') ,
                         gr.inputs.Textbox(label='Primary Column') ,
                         gr.inputs.Textbox(label='Combination of Column') 
                         ],
                      outputs = [
                          #gr.outputs.Dataframe(label='Primary data'),
                          #gr.outputs.Dataframe(label='Secondary data'),
                          'file'
                                 ],
                      title = 'Data Splitter ',
                      description="Split your data into 2 parts. Apres.io © 2022 All rights reserved.")
                      
iface.launch( debug=True)