Add crime per 1k people

This commit is contained in:
Andras Schmelczer 2026-03-08 21:02:29 +00:00
parent 245c16a212
commit e0798b24f7
4 changed files with 136 additions and 2 deletions

View file

@ -43,6 +43,8 @@ _AREA_COLUMNS = [
"Other crime (avg/yr)",
"Serious crime (avg/yr)",
"Minor crime (avg/yr)",
"Serious crime per 1k residents (avg/yr)",
"Minor crime per 1k residents (avg/yr)",
# Amenities
"Number of restaurants within 2km",
"Number of grocery shops and supermarkets within 2km",
@ -77,6 +79,7 @@ def _build(
broadband_path: Path,
geosure_path: Path,
rental_prices_path: Path,
lsoa_population_path: Path,
) -> tuple[pl.DataFrame, pl.DataFrame]:
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
@ -171,6 +174,17 @@ def _build(
).alias("minor_crime_avg_yr"),
)
lsoa_pop = pl.scan_parquet(lsoa_population_path)
wide = wide.join(lsoa_pop, on="lsoa21", how="left")
wide = wide.with_columns(
(pl.col("serious_crime_avg_yr") / pl.col("population") * 1000)
.round(1)
.alias("serious_crime_per_1k"),
(pl.col("minor_crime_avg_yr") / pl.col("population") * 1000)
.round(1)
.alias("minor_crime_per_1k"),
).drop("population")
poi_counts = pl.scan_parquet(poi_proximity_path)
wide = wide.join(poi_counts, on="postcode", how="left")
@ -301,6 +315,8 @@ def _build(
"max_download_speed": "Max available download speed (Mbps)",
"serious_crime_avg_yr": "Serious crime (avg/yr)",
"minor_crime_avg_yr": "Minor crime (avg/yr)",
"serious_crime_per_1k": "Serious crime per 1k residents (avg/yr)",
"minor_crime_per_1k": "Minor crime per 1k residents (avg/yr)",
"environmental_risk": "Environmental risk",
"collapsible_deposits_risk": "Collapsible deposits risk",
"compressible_ground_risk": "Compressible ground risk",
@ -389,6 +405,12 @@ def main():
required=True,
help="ONS rental prices by LA and bedroom count parquet file",
)
parser.add_argument(
"--lsoa-population",
type=Path,
required=True,
help="Census 2021 population by LSOA parquet file",
)
parser.add_argument(
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
)
@ -409,6 +431,7 @@ def main():
broadband_path=args.broadband,
geosure_path=args.geosure,
rental_prices_path=args.rental_prices,
lsoa_population_path=args.lsoa_population,
)
print(f"\nPostcode columns: {postcode_df.columns}")