Fix data pipelines once and for all
This commit is contained in:
parent
08560476c5
commit
4012e4e047
46 changed files with 4508 additions and 855 deletions
|
|
@ -102,15 +102,11 @@ _AREA_COLUMNS = [
|
|||
# is postcode-grain: it belongs in the area output (one value per postcode,
|
||||
# covering property-less postcodes too) rather than duplicated per property.
|
||||
TREE_DENSITY_FEATURE,
|
||||
# Schools
|
||||
"Good+ primary schools within 5km",
|
||||
"Good+ secondary schools within 5km",
|
||||
"Good+ primary schools within 2km",
|
||||
"Good+ secondary schools within 2km",
|
||||
"Outstanding primary schools within 5km",
|
||||
"Outstanding secondary schools within 5km",
|
||||
"Outstanding primary schools within 2km",
|
||||
"Outstanding secondary schools within 2km",
|
||||
# Schools (modelled historical catchment areas covering the postcode)
|
||||
"Good+ primary school catchments",
|
||||
"Good+ secondary school catchments",
|
||||
"Outstanding primary school catchments",
|
||||
"Outstanding secondary school catchments",
|
||||
# Demographics
|
||||
"Median age",
|
||||
# Politics
|
||||
|
|
@ -172,14 +168,10 @@ _FINAL_RENAME_COLUMNS = {
|
|||
"latest_price": "Last known price",
|
||||
"number_habitable_rooms": "Number of bedrooms & living rooms",
|
||||
"noise_lden_db": "Noise (dB)",
|
||||
"good_primary_5km": "Good+ primary schools within 5km",
|
||||
"good_secondary_5km": "Good+ secondary schools within 5km",
|
||||
"good_primary_2km": "Good+ primary schools within 2km",
|
||||
"good_secondary_2km": "Good+ secondary schools within 2km",
|
||||
"outstanding_primary_5km": "Outstanding primary schools within 5km",
|
||||
"outstanding_secondary_5km": "Outstanding secondary schools within 5km",
|
||||
"outstanding_primary_2km": "Outstanding primary schools within 2km",
|
||||
"outstanding_secondary_2km": "Outstanding secondary schools within 2km",
|
||||
"good_primary_catchments": "Good+ primary school catchments",
|
||||
"good_secondary_catchments": "Good+ secondary school catchments",
|
||||
"outstanding_primary_catchments": "Outstanding primary school catchments",
|
||||
"outstanding_secondary_catchments": "Outstanding secondary school catchments",
|
||||
"max_download_speed": "Max available download speed (Mbps)",
|
||||
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
||||
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
||||
|
|
@ -874,7 +866,7 @@ def _join_area_side_tables(
|
|||
election: pl.LazyFrame,
|
||||
poi_counts: pl.LazyFrame,
|
||||
noise: pl.LazyFrame,
|
||||
school_proximity: pl.LazyFrame,
|
||||
school_catchments: pl.LazyFrame,
|
||||
conservation_areas: pl.LazyFrame,
|
||||
tree_density: pl.LazyFrame | None,
|
||||
broadband: pl.LazyFrame,
|
||||
|
|
@ -905,7 +897,7 @@ def _join_area_side_tables(
|
|||
base = base.join(election, on="pcon", how="left")
|
||||
base = base.join(poi_counts, on="postcode", how="left")
|
||||
base = base.join(noise, on="postcode", how="left")
|
||||
base = base.join(school_proximity, on="postcode", how="left")
|
||||
base = base.join(school_catchments, on="postcode", how="left")
|
||||
base = base.join(conservation_areas, on="postcode", how="left").with_columns(
|
||||
pl.col(CONSERVATION_AREA_FEATURE).fill_null("No")
|
||||
)
|
||||
|
|
@ -1970,7 +1962,7 @@ def _build(
|
|||
ethnicity_path: Path,
|
||||
crime_path: Path,
|
||||
noise_path: Path,
|
||||
school_proximity_path: Path,
|
||||
school_catchments_path: Path,
|
||||
broadband_path: Path,
|
||||
conservation_areas_path: Path,
|
||||
rental_prices_path: Path,
|
||||
|
|
@ -2080,7 +2072,7 @@ def _build(
|
|||
)
|
||||
.select("postcode", "noise_lden_db")
|
||||
)
|
||||
school_proximity = pl.scan_parquet(school_proximity_path)
|
||||
school_catchments = pl.scan_parquet(school_catchments_path)
|
||||
conservation_areas = _conservation_area_by_postcode(
|
||||
arcgis.select("postcode", "lat", "lon"), conservation_areas_path
|
||||
)
|
||||
|
|
@ -2120,7 +2112,7 @@ def _build(
|
|||
"election": election,
|
||||
"poi_counts": poi_counts,
|
||||
"noise": noise,
|
||||
"school_proximity": school_proximity,
|
||||
"school_catchments": school_catchments,
|
||||
"conservation_areas": conservation_areas,
|
||||
"tree_density": tree_density,
|
||||
"broadband": broadband,
|
||||
|
|
@ -2267,10 +2259,10 @@ def main():
|
|||
"--noise", type=Path, required=True, help="Road noise by postcode parquet file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--school-proximity",
|
||||
"--school-catchments",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="School proximity counts parquet file",
|
||||
help="School catchment counts parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--broadband",
|
||||
|
|
@ -2376,7 +2368,7 @@ def main():
|
|||
ethnicity_path=args.ethnicity,
|
||||
crime_path=args.crime,
|
||||
noise_path=args.noise,
|
||||
school_proximity_path=args.school_proximity,
|
||||
school_catchments_path=args.school_catchments,
|
||||
broadband_path=args.broadband,
|
||||
conservation_areas_path=args.conservation_areas,
|
||||
rental_prices_path=args.rental_prices,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue