Spaces:
Sleeping
Sleeping
Briefly mention other important methods that change their behaviour based on missing values
Browse files- polars/11_missing_data.py +77 -1
polars/11_missing_data.py
CHANGED
|
@@ -203,7 +203,9 @@ def _(mo):
|
|
| 203 |
- if you don't want to propagate null values, use `_missing` variations of methods such as `eq` vs `eq_missing`
|
| 204 |
- you may want to fill in missing values based on calculations via `fill_null`, or manually edit the data based on external documents
|
| 205 |
|
| 206 |
-
|
|
|
|
|
|
|
| 207 |
"""
|
| 208 |
)
|
| 209 |
return
|
|
@@ -692,6 +694,80 @@ def _(day_perc, mo, perc_col):
|
|
| 692 |
return
|
| 693 |
|
| 694 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
@app.cell(hide_code=True)
|
| 696 |
def _(mo):
|
| 697 |
mo.md(
|
|
|
|
| 203 |
- if you don't want to propagate null values, use `_missing` variations of methods such as `eq` vs `eq_missing`
|
| 204 |
- you may want to fill in missing values based on calculations via `fill_null`, or manually edit the data based on external documents
|
| 205 |
|
| 206 |
+
You can also refer to the polars [User Guide](https://docs.pola.rs/user-guide/expressions/missing-data/) more more information.
|
| 207 |
+
|
| 208 |
+
Whichever approach you take, remember to document how you handled it!
|
| 209 |
"""
|
| 210 |
)
|
| 211 |
return
|
|
|
|
| 694 |
return
|
| 695 |
|
| 696 |
|
| 697 |
+
@app.cell(hide_code=True)
|
| 698 |
+
def _(mo):
|
| 699 |
+
mo.md(
|
| 700 |
+
r"""
|
| 701 |
+
## Appendix C: Everything else
|
| 702 |
+
|
| 703 |
+
As long as this Notebook is, it cannot reasonably cover ***everything*** that may have to deal with missing values, as that is literally everything that may have to deal with data.
|
| 704 |
+
|
| 705 |
+
This section very briefly covers some other features not mentioned above
|
| 706 |
+
"""
|
| 707 |
+
)
|
| 708 |
+
return
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
@app.cell(hide_code=True)
|
| 712 |
+
def _(mo):
|
| 713 |
+
mo.md(
|
| 714 |
+
r"""
|
| 715 |
+
### Missing values in Aggregations
|
| 716 |
+
|
| 717 |
+
Many aggregations methods will ignore/skip missing values, while others take them into consideration.
|
| 718 |
+
|
| 719 |
+
Always check the documentation of the method you're using, much of the time docstrings will explain their behaviour.
|
| 720 |
+
"""
|
| 721 |
+
)
|
| 722 |
+
return
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
@app.cell
|
| 726 |
+
def _(df, pl):
|
| 727 |
+
df.group_by("species").agg(
|
| 728 |
+
pl.col("height").len().alias("len"),
|
| 729 |
+
pl.col("height").count().alias("count"),
|
| 730 |
+
)
|
| 731 |
+
return
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
@app.cell(hide_code=True)
|
| 735 |
+
def _(mo):
|
| 736 |
+
mo.md(
|
| 737 |
+
r"""
|
| 738 |
+
### Missing values in Joins
|
| 739 |
+
|
| 740 |
+
By default null values will never produce matches using [join](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join.html), but you can specify `nulls_equal=True` to join Null values with each other.
|
| 741 |
+
"""
|
| 742 |
+
)
|
| 743 |
+
return
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
@app.cell(hide_code=True)
|
| 747 |
+
def _(pl):
|
| 748 |
+
age_groups = pl.DataFrame([
|
| 749 |
+
{"age": None, "stage": "Unknown"},
|
| 750 |
+
{"age": [0, 1], "stage": "Baby"},
|
| 751 |
+
{"age": [2, 3, 4, 5, 6, 7, 8, 9, 10], "stage": "Adult"},
|
| 752 |
+
{"age": [11, 12, 13, 14], "stage": "Senior"},
|
| 753 |
+
{"age": [15, 16, 17, 18, 19, 20], "stage": "Geriatric"},
|
| 754 |
+
])
|
| 755 |
+
age_groups
|
| 756 |
+
return (age_groups,)
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
@app.cell
|
| 760 |
+
def _(age_groups, df):
|
| 761 |
+
df.join(age_groups.explode("age"), on="age")
|
| 762 |
+
return
|
| 763 |
+
|
| 764 |
+
|
| 765 |
+
@app.cell
|
| 766 |
+
def _(age_groups, df):
|
| 767 |
+
df.join(age_groups.explode("age"), on="age", nulls_equal=True)
|
| 768 |
+
return
|
| 769 |
+
|
| 770 |
+
|
| 771 |
@app.cell(hide_code=True)
|
| 772 |
def _(mo):
|
| 773 |
mo.md(
|