etrotta commited on
Commit
60ca44f
·
1 Parent(s): be882e1

scan_csv first to save a little memory

Browse files
Files changed (1) hide show
  1. polars/11_missing_data.py +4 -4
polars/11_missing_data.py CHANGED
@@ -784,8 +784,8 @@ def _(mo):
784
 
785
  @app.cell
786
  def _(pl):
787
- raw_stations = pl.read_csv("hf://datasets/etrotta/weather-alertario/datario_alertario_stations.csv")
788
- raw_weather = pl.read_csv("hf://datasets/etrotta/weather-alertario/datario_alertario_weather_2020_to_2022.csv")
789
  return raw_stations, raw_weather
790
 
791
 
@@ -801,7 +801,7 @@ def _(pl, raw_stations):
801
  pl.col("endereco").alias("address"),
802
  pl.col("data_inicio_operacao").alias("operation_start_date"),
803
  pl.col("data_fim_operacao").alias("operation_end_date"),
804
- )
805
  return (dirty_stations,)
806
 
807
 
@@ -811,7 +811,7 @@ def _(pl, raw_weather):
811
  pl.col("id_estacao").alias("station"),
812
  pl.col("acumulado_chuva_15_min").alias("accumulated_rain_15_minutes"),
813
  pl.concat_str("data_particao", pl.lit("T"), "horario").str.to_datetime(time_zone=None).alias("datetime"),
814
- )
815
  return (dirty_weather_naive,)
816
 
817
 
 
784
 
785
  @app.cell
786
  def _(pl):
787
+ raw_stations = pl.scan_csv("hf://datasets/etrotta/weather-alertario/datario_alertario_stations.csv")
788
+ raw_weather = pl.scan_csv("hf://datasets/etrotta/weather-alertario/datario_alertario_weather_2020_to_2022.csv")
789
  return raw_stations, raw_weather
790
 
791
 
 
801
  pl.col("endereco").alias("address"),
802
  pl.col("data_inicio_operacao").alias("operation_start_date"),
803
  pl.col("data_fim_operacao").alias("operation_end_date"),
804
+ ).collect()
805
  return (dirty_stations,)
806
 
807
 
 
811
  pl.col("id_estacao").alias("station"),
812
  pl.col("acumulado_chuva_15_min").alias("accumulated_rain_15_minutes"),
813
  pl.concat_str("data_particao", pl.lit("T"), "horario").str.to_datetime(time_zone=None).alias("datetime"),
814
+ ).collect()
815
  return (dirty_weather_naive,)
816
 
817