Fun changes
Some checks failed
CI / Python (lint + test) (push) Failing after 3m38s
CI / Rust (lint + test) (push) Failing after 3m32s
CI / Frontend (lint + typecheck) (push) Failing after 4m12s
Build and publish Docker image / build-and-push (push) Failing after 4m48s

This commit is contained in:
Andras Schmelczer 2026-04-04 22:59:44 +01:00
parent cd778dd088
commit 349a6c1d53
60 changed files with 1260 additions and 2600 deletions

View file

@ -49,7 +49,7 @@ _AREA_COLUMNS = [
# Amenities
"Number of restaurants within 2km",
"Number of grocery shops and supermarkets within 2km",
"Number of parks within 2km",
"Number of parks within 1km",
"Distance to nearest train or tube station (km)",
"Distance to nearest park (km)",
# Environment
@ -62,6 +62,16 @@ _AREA_COLUMNS = [
"Good+ secondary schools within 2km",
# Demographics
"Median age",
# Politics
"Winning party",
"Voter turnout (%)",
"Majority (%)",
"% Labour",
"% Conservative",
"% Liberal Democrat",
"% Reform UK",
"% Green",
"% Other parties",
]
@ -78,6 +88,7 @@ def _build(
rental_prices_path: Path,
lsoa_population_path: Path,
median_age_path: Path,
election_results_path: Path,
) -> tuple[pl.DataFrame, pl.DataFrame]:
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
@ -113,6 +124,7 @@ def _build(
pl.col("long").alias("lon"),
"lsoa21",
"oa21",
"pcon",
)
)
wide = wide.join(arcgis, on="postcode", how="left")
@ -193,6 +205,9 @@ def _build(
median_age = pl.scan_parquet(median_age_path)
wide = wide.join(median_age, on="lsoa21", how="left")
election = pl.scan_parquet(election_results_path)
wide = wide.join(election, on="pcon", how="left")
poi_counts = pl.scan_parquet(poi_proximity_path)
wide = wide.join(poi_counts, on="postcode", how="left")
@ -304,6 +319,7 @@ def _build(
"Barriers to Housing and Services Score",
"lsoa21",
"oa21",
"pcon",
"epc_property_type",
"pp_property_type",
"built_form",
@ -323,7 +339,7 @@ def _build(
"property_type": "Property type",
"restaurants_2km": "Number of restaurants within 2km",
"groceries_2km": "Number of grocery shops and supermarkets within 2km",
"parks_2km": "Number of parks within 2km",
"parks_1km": "Number of parks within 1km",
"train_tube_nearest_km": "Distance to nearest train or tube station (km)",
"parks_nearest_km": "Distance to nearest park (km)",
"latest_price": "Last known price",
@ -342,6 +358,9 @@ def _build(
"floor_height": "Interior height (m)",
"was_council_house": "Former council house",
"median_age": "Median age",
"winning_party": "Winning party",
"turnout_pct": "Voter turnout (%)",
"majority_pct": "Majority (%)",
}
)
)
@ -427,6 +446,12 @@ def main():
required=True,
help="Census 2021 median age by LSOA parquet file",
)
parser.add_argument(
"--election-results",
type=Path,
required=True,
help="2024 General Election results by constituency parquet file",
)
parser.add_argument(
"--output-postcodes",
type=Path,
@ -454,6 +479,7 @@ def main():
rental_prices_path=args.rental_prices,
lsoa_population_path=args.lsoa_population,
median_age_path=args.median_age,
election_results_path=args.election_results,
)
print(f"\nPostcode columns: {postcode_df.columns}")

View file

@ -17,7 +17,7 @@ POI_GROUPS_2KM = {
# Groups for which to compute distance to nearest POI (from filtered POIs)
DISTANCE_GROUPS = {
"train_tube": ["Metro or Tram stop", "Rail station"],
"train_tube": ["Tube station", "Rail station"],
}
# OS Open Greenspace function types used for park counts and distance calculation.
@ -67,8 +67,8 @@ def main():
# Park counts and distances from OS Open Greenspace
greenspace = pl.read_parquet(args.greenspace)
park_counts_2km = count_pois_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=2
park_counts_1km = count_pois_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=1
)
park_distances = min_distance_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS
@ -77,7 +77,7 @@ def main():
# Join all results on postcode
result = (
counts_2km.join(distances, on="postcode")
.join(park_counts_2km, on="postcode")
.join(park_counts_1km, on="postcode")
.join(park_distances, on="postcode")
)

View file

@ -1054,7 +1054,7 @@ NAPTAN_EMOJIS: dict[str, str] = {
"Bus stop": "🚏",
"Bus station": "🚌",
"Taxi rank": "🚕",
"Metro or Tram stop": "🚊",
"Tube station": "🚇",
}