diff --git a/pipeline/transform/merge.py b/pipeline/transform/merge.py index 023be43..5ef6837 100644 --- a/pipeline/transform/merge.py +++ b/pipeline/transform/merge.py @@ -136,15 +136,12 @@ def _build_wide( ) wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left") + # Use built_form (Terraced, Semi-detached) when available, otherwise epc_property_type wide = wide.with_columns( - pl.when(pl.col("pp_property_type") == pl.col("built_form")) - .then(pl.col("pp_property_type")) - .otherwise( - pl.concat_str( - [pl.col("pp_property_type"), pl.lit("/"), pl.col("built_form")] - ) - ) - .alias("property_type_built_form") + pl.when(pl.col("pp_property_type").is_in(["Terraced", "Semi-Detached"])) + .then(pl.col("built_form")) + .otherwise(pl.col("epc_property_type")) + .alias("epc_property_type") ) wide = ( @@ -167,7 +164,6 @@ def _build_wide( .alias("Price per sqm"), ) .drop( - "date_of_transfer", "inspection_date", "floor_height", "LSOA name (2021)", @@ -177,6 +173,8 @@ def _build_wide( "Geographical Barriers Sub-domain Score", "Adult Skills Sub-domain Score", "Children and Young People Sub-domain Score", + "Crime Score", + "Index of Multiple Deprivation (IMD) Score", "Income Deprivation Affecting Older People (IDAOPI) Score (rate)", "Income Deprivation Affecting Children Index (IDACI) Score (rate)", "Barriers to Housing and Services Score", @@ -187,7 +185,8 @@ def _build_wide( ) .rename( { - "construction_age_band": "Approximate construction age", + "date_of_transfer": "Previous transaction date", + "construction_age_band": "Construction age", "is_construction_date_approximate": "Is construction date approximate", "pp_address": "Address per Property Register", "epc_address": "Address per EPC", @@ -197,7 +196,6 @@ def _build_wide( "potential_energy_rating": "Potential energy rating", "total_floor_area": "Total floor area (sqm)", "epc_property_type": "Property type", - "property_type_built_form": "Property type/built form", "restaurants_2km": "Restaurants within 2km", "groceries_2km": "Groceries within 2km", "parks_2km": "Parks within 2km",