vibes
This commit is contained in:
parent
80c093b7ba
commit
f72c43a9fa
101 changed files with 2168 additions and 1177 deletions
|
|
@ -13,13 +13,13 @@ import polars as pl
|
|||
from shapely.geometry import Point
|
||||
from tqdm import tqdm
|
||||
|
||||
from pipeline.download.pois import (
|
||||
from pipeline.utils.england_geometry import (
|
||||
ENGLAND_BBOX_EAST,
|
||||
ENGLAND_BBOX_NORTH,
|
||||
ENGLAND_BBOX_SOUTH,
|
||||
ENGLAND_BBOX_WEST,
|
||||
load_england_polygon,
|
||||
)
|
||||
from pipeline.utils.england_geometry import load_england_polygon
|
||||
|
||||
PLACE_TYPES = {"city"}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,42 @@ BEDROOM_SHEETS = {
|
|||
# Local authority district codes in England, https://en.wikipedia.org/wiki/ONS_coding_system
|
||||
LA_PREFIXES = ("E06", "E07", "E08", "E09")
|
||||
|
||||
# April 2021 + April 2023 LA reorganizations: old district codes → new unitary authority codes.
|
||||
# The ONS rental data (Oct 2022 – Sep 2023) uses the old codes; IoD 2025 uses the new ones.
|
||||
# We remap old → new and average the medians so the join in merge.py works.
|
||||
LA_CONSOLIDATION = {
|
||||
# North Northamptonshire (April 2021)
|
||||
"E07000150": "E06000061", # Corby
|
||||
"E07000152": "E06000061", # East Northamptonshire
|
||||
"E07000153": "E06000061", # Kettering
|
||||
"E07000156": "E06000061", # Wellingborough
|
||||
# West Northamptonshire (April 2021)
|
||||
"E07000151": "E06000062", # Daventry
|
||||
"E07000154": "E06000062", # Northampton
|
||||
"E07000155": "E06000062", # South Northamptonshire
|
||||
# Cumberland (April 2023)
|
||||
"E07000026": "E06000063", # Allerdale
|
||||
"E07000028": "E06000063", # Carlisle
|
||||
"E07000029": "E06000063", # Copeland
|
||||
# Westmorland and Furness (April 2023)
|
||||
"E07000027": "E06000064", # Barrow-in-Furness
|
||||
"E07000030": "E06000064", # Eden
|
||||
"E07000031": "E06000064", # South Lakeland
|
||||
# North Yorkshire (April 2023)
|
||||
"E07000163": "E06000065", # Craven
|
||||
"E07000164": "E06000065", # Hambleton
|
||||
"E07000165": "E06000065", # Harrogate
|
||||
"E07000166": "E06000065", # Richmondshire
|
||||
"E07000167": "E06000065", # Ryedale
|
||||
"E07000168": "E06000065", # Scarborough
|
||||
"E07000169": "E06000065", # Selby
|
||||
# Somerset (April 2023)
|
||||
"E07000187": "E06000066", # Mendip
|
||||
"E07000188": "E06000066", # Sedgemoor
|
||||
"E07000189": "E06000066", # South Somerset
|
||||
"E07000246": "E06000066", # Somerset West and Taunton
|
||||
}
|
||||
|
||||
|
||||
def _read_sheet(xls_path: Path, sheet_id: int, bedrooms: int) -> pl.DataFrame:
|
||||
"""Read one bedroom category sheet, extract LA-level median rents."""
|
||||
|
|
@ -61,6 +97,14 @@ def convert_to_parquet(xls_path: Path, parquet_path: Path) -> None:
|
|||
frames.append(df)
|
||||
|
||||
combined = pl.concat(frames)
|
||||
|
||||
# Remap old LA codes to new unitary authority codes and average medians
|
||||
combined = combined.with_columns(
|
||||
pl.col("area_code").replace(LA_CONSOLIDATION),
|
||||
).group_by("area_code", "bedrooms").agg(
|
||||
pl.col("median_monthly_rent").mean(),
|
||||
)
|
||||
|
||||
print(f"Combined: {combined.shape}")
|
||||
print(f"Non-null medians: {combined['median_monthly_rent'].drop_nulls().len()}")
|
||||
print(combined.head(10))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue