Add geosure data

This commit is contained in:
Andras Schmelczer 2026-02-07 13:22:57 +00:00
parent c715475351
commit c91561d7fe
4 changed files with 216 additions and 1 deletions

View file

@ -41,6 +41,7 @@ def _build_wide(
noise_path: Path,
school_proximity_path: Path,
broadband_path: Path,
geosure_path: Path,
) -> pl.DataFrame:
"""Build the wide dataframe by joining epc_pp with all auxiliary data."""
wide = pl.scan_parquet(epc_pp_path)
@ -136,6 +137,9 @@ def _build_wide(
)
wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
geosure = pl.scan_parquet(geosure_path)
wide = wide.join(geosure, on="postcode", how="left")
# Use built_form (Terraced, Semi-detached) when available, otherwise epc_property_type
wide = wide.with_columns(
pl.when(pl.col("pp_property_type").is_in(["Terraced", "Semi-Detached"]))
@ -208,6 +212,14 @@ def _build_wide(
"max_download_speed": "Max available download speed (Mbps)",
"serious_crime_avg_yr": "Serious crime (avg/yr)",
"minor_crime_avg_yr": "Minor crime (avg/yr)",
"transaction_year": "Transaction year",
"environmental_risk": "Environmental risk",
"collapsible_deposits_risk": "Collapsible deposits risk",
"compressible_ground_risk": "Compressible ground risk",
"landslide_risk": "Landslide risk",
"running_sand_risk": "Running sand risk",
"shrink_swell_risk": "Shrink-swell risk",
"soluble_rocks_risk": "Soluble rocks risk",
}
)
)
@ -276,6 +288,12 @@ def main():
required=True,
help="Broadband performance by output area parquet file",
)
parser.add_argument(
"--geosure",
type=Path,
required=True,
help="GeoSure ground stability parquet file",
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
@ -293,6 +311,7 @@ def main():
noise_path=args.noise,
school_proximity_path=args.school_proximity,
broadband_path=args.broadband,
geosure_path=args.geosure,
)
print(f"Columns: {wide.columns}")