This commit is contained in:
Andras Schmelczer 2026-03-15 21:22:28 +00:00
parent 479ef92236
commit c38d654ac7
44 changed files with 2526 additions and 701 deletions

View file

@ -94,11 +94,18 @@ def _build(
# Remap terminated postcodes to nearest active successor
postcode_mapping = build_postcode_mapping(arcgis_path)
wide = wide.join(
postcode_mapping.lazy(), left_on="postcode", right_on="old_postcode", how="left"
).with_columns(
pl.coalesce("new_postcode", "postcode").alias("postcode"),
).drop("new_postcode")
wide = (
wide.join(
postcode_mapping.lazy(),
left_on="postcode",
right_on="old_postcode",
how="left",
)
.with_columns(
pl.coalesce("new_postcode", "postcode").alias("postcode"),
)
.drop("new_postcode")
)
arcgis = (
pl.scan_parquet(arcgis_path)
@ -252,16 +259,18 @@ def _build(
.otherwise(pl.col("pp_property_type"))
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes",
# collapse terrace sub-types, and fold rare types into "Other"
.replace({
"Flat": "Flats/Maisonettes",
"Maisonette": "Flats/Maisonettes",
"End-Terrace": "Terraced",
"Mid-Terrace": "Terraced",
"Enclosed End-Terrace": "Terraced",
"Enclosed Mid-Terrace": "Terraced",
"Bungalow": "Other",
"Park home": "Other",
})
.replace(
{
"Flat": "Flats/Maisonettes",
"Maisonette": "Flats/Maisonettes",
"End-Terrace": "Terraced",
"Mid-Terrace": "Terraced",
"Enclosed End-Terrace": "Terraced",
"Enclosed Mid-Terrace": "Terraced",
"Bungalow": "Other",
"Park home": "Other",
}
)
.alias("property_type")
)
@ -426,10 +435,16 @@ def main():
help="Census 2021 population by LSOA parquet file",
)
parser.add_argument(
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
"--output-postcodes",
type=Path,
required=True,
help="Output postcode parquet file path",
)
parser.add_argument(
"--output-properties", type=Path, required=True, help="Output properties parquet file path"
"--output-properties",
type=Path,
required=True,
help="Output properties parquet file path",
)
args = parser.parse_args()