Can't even keep track anymore
This commit is contained in:
parent
dccc1e439d
commit
3a3f899ea2
50 changed files with 1144 additions and 560 deletions
|
|
@ -42,6 +42,7 @@ def _build_wide(
|
|||
school_proximity_path: Path,
|
||||
broadband_path: Path,
|
||||
geosure_path: Path,
|
||||
rental_prices_path: Path,
|
||||
) -> pl.DataFrame:
|
||||
"""Build the wide dataframe by joining epc_pp with all auxiliary data."""
|
||||
wide = (
|
||||
|
|
@ -94,6 +95,21 @@ def _build_wide(
|
|||
how="left",
|
||||
)
|
||||
|
||||
# Derive bedroom count: habitable rooms - 1 (assuming 1 reception room), clipped to 0..4
|
||||
wide = wide.with_columns(
|
||||
(pl.col("number_habitable_rooms") - 1)
|
||||
.clip(0, 4)
|
||||
.cast(pl.UInt8)
|
||||
.alias("_bedrooms"),
|
||||
)
|
||||
rental = pl.scan_parquet(rental_prices_path)
|
||||
wide = wide.join(
|
||||
rental,
|
||||
left_on=["Local Authority District code (2024)", "_bedrooms"],
|
||||
right_on=["area_code", "bedrooms"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
crime = pl.scan_parquet(crime_path)
|
||||
wide = wide.join(crime, left_on="lsoa21", right_on="LSOA code", how="left")
|
||||
|
||||
|
|
@ -208,6 +224,7 @@ def _build_wide(
|
|||
.drop(
|
||||
"inspection_date",
|
||||
"floor_height",
|
||||
"_bedrooms",
|
||||
"LSOA name (2021)",
|
||||
"Local Authority District code (2024)",
|
||||
"Local Authority District name (2024)",
|
||||
|
|
@ -258,6 +275,7 @@ def _build_wide(
|
|||
"running_sand_risk": "Running sand risk",
|
||||
"shrink_swell_risk": "Shrink-swell risk",
|
||||
"soluble_rocks_risk": "Soluble rocks risk",
|
||||
"median_monthly_rent": "Estimated monthly rent",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
|
@ -332,6 +350,12 @@ def main():
|
|||
required=True,
|
||||
help="GeoSure ground stability parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rental-prices",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="ONS rental prices by LA and bedroom count parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
|
|
@ -350,6 +374,7 @@ def main():
|
|||
school_proximity_path=args.school_proximity,
|
||||
broadband_path=args.broadband,
|
||||
geosure_path=args.geosure,
|
||||
rental_prices_path=args.rental_prices,
|
||||
)
|
||||
|
||||
print(f"Columns: {wide.columns}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue