lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
|
|
@ -3,7 +3,8 @@ import argparse
|
|||
import polars as pl
|
||||
from pathlib import Path
|
||||
|
||||
MIN_PRICE = 10_000
|
||||
from pipeline.utils.postcode_mapping import build_postcode_mapping
|
||||
|
||||
MIN_FLOOR_AREA_M2 = 10
|
||||
|
||||
|
||||
|
|
@ -45,20 +46,23 @@ def _build_wide(
|
|||
rental_prices_path: Path,
|
||||
) -> pl.DataFrame:
|
||||
"""Build the wide dataframe by joining epc_pp with all auxiliary data."""
|
||||
wide = (
|
||||
pl.scan_parquet(epc_pp_path)
|
||||
.filter(
|
||||
pl.col("total_floor_area").is_null()
|
||||
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
|
||||
)
|
||||
.filter(
|
||||
pl.col("latest_price").is_null() | (pl.col("latest_price") >= MIN_PRICE)
|
||||
)
|
||||
wide = pl.scan_parquet(epc_pp_path).filter(
|
||||
pl.col("total_floor_area").is_null()
|
||||
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
|
||||
)
|
||||
|
||||
# Remap terminated postcodes to nearest active successor
|
||||
postcode_mapping = build_postcode_mapping(arcgis_path)
|
||||
wide = wide.join(
|
||||
postcode_mapping.lazy(), left_on="postcode", right_on="old_postcode", how="left"
|
||||
).with_columns(
|
||||
pl.coalesce("new_postcode", "postcode").alias("postcode"),
|
||||
).drop("new_postcode")
|
||||
|
||||
arcgis = (
|
||||
pl.scan_parquet(arcgis_path)
|
||||
.filter(pl.col("ctry") == "E92000001") # England only
|
||||
.filter(pl.col("doterm").is_null()) # Active postcodes only
|
||||
.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
"lat",
|
||||
|
|
@ -67,7 +71,7 @@ def _build_wide(
|
|||
"oa21",
|
||||
)
|
||||
)
|
||||
wide = wide.join(arcgis, on="postcode", how="full", coalesce=True)
|
||||
wide = wide.join(arcgis, on="postcode", how="left")
|
||||
|
||||
wide = _join_journey_times(wide, journey_times_bank_path, "Bank")
|
||||
wide = _join_journey_times(wide, journey_times_fitzrovia_path, "Fitzrovia")
|
||||
|
|
@ -147,11 +151,6 @@ def _build_wide(
|
|||
.with_columns(
|
||||
pl.max_horizontal(*noise_cols).alias("noise_lden_db"),
|
||||
)
|
||||
.with_columns(
|
||||
pl.col("noise_lden_db")
|
||||
.fill_null(pl.col("noise_lden_db").min())
|
||||
.alias("noise_lden_db"),
|
||||
)
|
||||
.select("postcode", "noise_lden_db")
|
||||
)
|
||||
wide = wide.join(noise, on="postcode", how="left")
|
||||
|
|
@ -181,7 +180,7 @@ def _build_wide(
|
|||
.group_by("bb_postcode")
|
||||
.agg(pl.col("max_download_speed").max())
|
||||
)
|
||||
wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
|
||||
wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left").drop("bb_postcode")
|
||||
|
||||
geosure = pl.scan_parquet(geosure_path)
|
||||
wide = wide.join(geosure, on="postcode", how="left")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue