Commit
·
b3ce2b2
1
Parent(s):
a365da6
Add sample files
Browse files- data/z_animal.csv +11 -0
- data/z_employee.csv +26 -0
- data/z_house.csv +7 -0
- utils/load_csv.py +23 -0
data/z_animal.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
|
2 |
+
1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
|
3 |
+
2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
|
4 |
+
3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
|
5 |
+
4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
|
6 |
+
5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
|
7 |
+
6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
|
8 |
+
7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
|
9 |
+
8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
|
10 |
+
9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
|
11 |
+
10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern
|
data/z_employee.csv
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EmployeeID,FirstName,LastName,Email,Department,Salary
|
2 |
+
101,John,Smith,john.smith@example.com,Finance,60000
|
3 |
+
102,Emily,Johnson,emily.johnson@example.com,Marketing,55000
|
4 |
+
103,Michael,Williams,michael.williams@example.com,HR,50000
|
5 |
+
104,Susan,Anderson,susan.anderson@example.com,IT,65000
|
6 |
+
105,David,Martin,david.martin@example.com,Sales,58000
|
7 |
+
106,Linda,Davis,linda.davis@example.com,Finance,62000
|
8 |
+
107,William,Miller,william.miller@example.com,Marketing,56000
|
9 |
+
108,Sarah,Anderson,sarah.anderson@example.com,HR,51000
|
10 |
+
109,Robert,Clark,robert.clark@example.com,IT,67000
|
11 |
+
110,Karen,Wilson,karen.wilson@example.com,Sales,59000
|
12 |
+
111,James,Brown,james.brown@example.com,Finance,61000
|
13 |
+
112,Anna,Johnson,anna.johnson@example.com,Marketing,57000
|
14 |
+
113,Christopher,Moore,christopher.moore@example.com,HR,52000
|
15 |
+
114,Laura,White,laura.white@example.com,IT,68000
|
16 |
+
115,Mark,Davis,mark.davis@example.com,Sales,60000
|
17 |
+
116,Patricia,Jones,patricia.jones@example.com,Finance,63000
|
18 |
+
117,Matthew,Taylor,matthew.taylor@example.com,Marketing,58000
|
19 |
+
118,Jennifer,Young,jennifer.young@example.com,HR,53000
|
20 |
+
119,Steven,Anderson,steven.anderson@example.com,IT,69000
|
21 |
+
120,Elizabeth,Thomas,elizabeth.thomas@example.com,Sales,61000
|
22 |
+
121,Kevin,Harris,kevin.harris@example.com,Finance,64000
|
23 |
+
122,Deborah,Smith,deborah.smith@example.com,Marketing,59000
|
24 |
+
123,Joseph,Walker,joseph.walker@example.com,HR,54000
|
25 |
+
124,Cynthia,Jackson,cynthia.jackson@example.com,IT,70000
|
26 |
+
125,Daniel,Hall,daniel.hall@example.com,Sales,62000
|
data/z_house.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PropertyID,StreetAddress,City,State,ZipCode,NumberOfBedrooms,NumberOfBathrooms,SquareFootage,Price
|
2 |
+
1,123 Main St,Los Angeles,CA,90001,3,2,1800,550000
|
3 |
+
2,456 Elm St,New York,NY,10001,2,1,1200,750000
|
4 |
+
3,789 Oak St,San Francisco,CA,94101,4,3,2500,950000
|
5 |
+
4,101 Maple St,Boston,MA,02101,3,2.5,2000,680000
|
6 |
+
5,202 Pine St,Miami,FL,33101,4,3.5,2700,820000
|
7 |
+
6,303 Cedar St,Chicago,IL,60601,2,1,1100,450000
|
utils/load_csv.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from utils.read_config import get_args
|
3 |
+
def check_csv(upload_file):
|
4 |
+
df = pd.read_csv(upload_file)
|
5 |
+
return df
|
6 |
+
|
7 |
+
# Function to load sample of dataset
|
8 |
+
def load_sample(num_sample_records, sample_method, df, col_name):
|
9 |
+
|
10 |
+
sample_first_records = get_args("first_records")
|
11 |
+
sample_random_seed = get_args("random_seed")
|
12 |
+
|
13 |
+
num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
|
14 |
+
|
15 |
+
# Keep only required column
|
16 |
+
df = df[[col_name]]
|
17 |
+
if sample_method == "First":
|
18 |
+
df = df.iloc[:num_sample_records].copy().reset_index()
|
19 |
+
if sample_method == "Last":
|
20 |
+
df = df.iloc[-num_sample_records:].copy().reset_index()
|
21 |
+
if sample_method == "Random":
|
22 |
+
df = df.sample(num_sample_records, random_state=sample_random_seed).copy().reset_index()
|
23 |
+
return df
|