SPlit up
This commit is contained in:
parent
cf39ad754e
commit
f59d01227b
91 changed files with 10370 additions and 7562 deletions
|
|
@ -123,10 +123,13 @@ def transform_crime(
|
|||
)
|
||||
|
||||
yearly_counts = (
|
||||
filtered.group_by("LSOA code", "year", "Crime type", "Month")
|
||||
.agg((pl.col("_weight").first() * pl.len()).alias("count"))
|
||||
.group_by("LSOA code", "year", "Crime type")
|
||||
.agg(pl.col("count").sum().alias("count"))
|
||||
# Sum per-incident weights directly: a 2021 LSOA can receive incidents
|
||||
# carrying different `_weight`s in the same month (split 2011 parent at
|
||||
# 1/N alongside an unsplit one at 1), so `_weight.first() * len` would
|
||||
# apply one row's weight to all of them — and nondeterministically so,
|
||||
# since `first` after a join has no ordering guarantee.
|
||||
filtered.group_by("LSOA code", "year", "Crime type")
|
||||
.agg(pl.col("_weight").sum().alias("count"))
|
||||
.join(months_per_year, on="year")
|
||||
.with_columns(
|
||||
(pl.col("count") * 12.0 / pl.col("months_in_year")).alias("per_year")
|
||||
|
|
@ -191,10 +194,10 @@ def _write_crime_by_year(
|
|||
)
|
||||
|
||||
yearly_per_type = (
|
||||
filtered.group_by("LSOA code", "Crime type", "year", "Month")
|
||||
.agg((pl.col("_weight").first() * pl.len()).alias("count"))
|
||||
.group_by("LSOA code", "Crime type", "year")
|
||||
.agg(pl.col("count").sum().alias("count"))
|
||||
# Per-incident weight sum, not `_weight.first() * len` — see the
|
||||
# matching comment in transform_crime.
|
||||
filtered.group_by("LSOA code", "Crime type", "year")
|
||||
.agg(pl.col("_weight").sum().alias("count"))
|
||||
.join(months_per_year, on="year")
|
||||
.with_columns(
|
||||
(pl.col("count").cast(pl.Float32) * 12.0 / pl.col("months_in_year"))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue