idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -95,11 +95,14 @@ def transform_crime(
|
|||
f"({valid_months[0]} to {valid_months[-1]})"
|
||||
)
|
||||
|
||||
# Count monthly incidents, then annualise over every valid month in the dataset.
|
||||
# `_weight` (≤1) comes from the LSOA 2011→2021 lookup: 2011 LSOAs that split
|
||||
# into N 2021 LSOAs contribute 1/N of their count to each child, since we
|
||||
# don't know which child a given incident actually belonged to.
|
||||
yearly_counts = (
|
||||
# Annualise each year separately (count_in_year * 12 / months_in_year), then
|
||||
# take the simple mean of those per-year rates over the years each type is
|
||||
# present. This makes the headline equal the average of the by-year chart bars
|
||||
# (_write_crime_by_year) instead of a month-weighted pooled rate, mirroring
|
||||
# crime_spatial._write_avg_yr. `_weight` (≤1) comes from the LSOA 2011→2021
|
||||
# lookup: 2011 LSOAs that split into N 2021 LSOAs contribute 1/N of their count
|
||||
# to each child, since we don't know which child an incident actually belonged to.
|
||||
filtered = (
|
||||
df.filter(
|
||||
valid_month_expr
|
||||
& pl.col("LSOA code").is_not_null()
|
||||
|
|
@ -107,15 +110,31 @@ def transform_crime(
|
|||
& pl.col("Crime type").is_not_null()
|
||||
& (pl.col("Crime type") != "")
|
||||
)
|
||||
.with_columns(pl.col("Crime type").replace(LEGACY_CRIME_TYPE_ALIASES))
|
||||
.group_by("LSOA code", "Month", "Crime type")
|
||||
.agg((pl.col("_weight").first() * pl.len()).alias("count"))
|
||||
.group_by("LSOA code", "Crime type")
|
||||
.agg(
|
||||
(pl.col("count").sum() / pl.lit(valid_month_count) * 12)
|
||||
.round(1)
|
||||
.alias("yearly_avg")
|
||||
.with_columns(
|
||||
pl.col("Month").str.slice(0, 4).cast(pl.Int32).alias("year"),
|
||||
pl.col("Crime type").replace(LEGACY_CRIME_TYPE_ALIASES),
|
||||
)
|
||||
)
|
||||
|
||||
# Months observed *anywhere* in the dataset for each year (annualisation
|
||||
# denominator), matching the by-year output's per-year scaling.
|
||||
months_per_year = filtered.group_by("year").agg(
|
||||
pl.col("Month").n_unique().alias("months_in_year")
|
||||
)
|
||||
|
||||
yearly_counts = (
|
||||
filtered.group_by("LSOA code", "year", "Crime type", "Month")
|
||||
.agg((pl.col("_weight").first() * pl.len()).alias("count"))
|
||||
.group_by("LSOA code", "year", "Crime type")
|
||||
.agg(pl.col("count").sum().alias("count"))
|
||||
.join(months_per_year, on="year")
|
||||
.with_columns(
|
||||
(pl.col("count") * 12.0 / pl.col("months_in_year")).alias("per_year")
|
||||
)
|
||||
# Mean of the per-year annualised rates over the years the type is present
|
||||
# (only years with rows are grouped here, so this is the correct x-span).
|
||||
.group_by("LSOA code", "Crime type")
|
||||
.agg(pl.col("per_year").mean().round(1).alias("yearly_avg"))
|
||||
.collect(engine="streaming")
|
||||
)
|
||||
if yearly_counts.is_empty():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue