good
This commit is contained in:
parent
c995f12f8b
commit
8dc939d761
44 changed files with 3540 additions and 2159478 deletions
|
|
@ -14,6 +14,7 @@ FINDER_DATA := ./finder/data
|
|||
|
||||
TILES := $(DATA_DIR)/uk.pmtiles
|
||||
SATELLITE_TILES := $(DATA_DIR)/satellite.pmtiles
|
||||
SATELLITE_HIGHRES_TILES := $(DATA_DIR)/satellite_highres.pmtiles
|
||||
ARCGIS := $(DATA_DIR)/arcgis_data.parquet
|
||||
PRICE_PAID := $(DATA_DIR)/price-paid-complete.parquet
|
||||
IOD := $(DATA_DIR)/IoD2025_Scores.parquet
|
||||
|
|
@ -33,14 +34,14 @@ ACTUAL_LISTINGS_RAW := $(FINDER_DATA)/online_listings_buy.parquet
|
|||
ACTUAL_LISTINGS_ENRICHED := $(FINDER_DATA)/online_listings_buy_enriched.parquet
|
||||
ETHNICITY := $(DATA_DIR)/ethnicity_by_la.parquet
|
||||
CRIME_DIR := $(DATA_DIR)/crime
|
||||
CRIME := $(DATA_DIR)/crime_by_lsoa.parquet
|
||||
CRIME_BY_YEAR := $(DATA_DIR)/crime_by_year_by_lsoa.parquet
|
||||
CRIME := $(DATA_DIR)/crime_by_postcode.parquet
|
||||
CRIME_BY_YEAR := $(DATA_DIR)/crime_by_postcode_by_year.parquet
|
||||
CRIME_STAMP := $(CRIME_DIR)/.downloaded
|
||||
LSOA_LOOKUP := $(DATA_DIR)/lsoa_2011_to_2021.parquet
|
||||
NOISE := $(DATA_DIR)/road_noise.parquet
|
||||
NOISE_OVERLAY_TILES := $(DATA_DIR)/noise_lden_10m.pmtiles
|
||||
CRIME_HOTSPOT_TILES := $(DATA_DIR)/crime_hotspots.pmtiles
|
||||
TREE_OVERLAY_TILES := $(DATA_DIR)/trees_outside_woodlands.pmtiles
|
||||
PROPERTY_BORDER_TILES := $(DATA_DIR)/property_borders.pmtiles
|
||||
OFSTED := $(DATA_DIR)/ofsted.parquet
|
||||
GIAS := $(DATA_DIR)/gias.parquet
|
||||
NAPTAN := $(DATA_DIR)/naptan.parquet
|
||||
|
|
@ -61,12 +62,12 @@ GREENSPACE := $(DATA_DIR)/greenspace_water.parquet
|
|||
OS_GREENSPACE := $(DATA_DIR)/os_greenspace.parquet
|
||||
PBF := $(DATA_DIR)/england-latest.osm.pbf
|
||||
FR_TOW := $(DATA_DIR)/FR_TOW_V1_ALL.zip
|
||||
NFI := $(DATA_DIR)/NFI_WOODLAND_ENGLAND.zip
|
||||
TREE_DENSITY_PC := $(DATA_DIR)/tree_density_by_postcode.parquet
|
||||
TREE_DENSITY_STREETS := $(DATA_DIR)/tree_density_by_street.parquet
|
||||
TREE_DENSITY_ADDR := $(DATA_DIR)/tree_density_by_address.parquet
|
||||
OFS_REGISTER := $(DATA_DIR)/ofs_register.xlsx
|
||||
PLACES := $(DATA_DIR)/places.parquet
|
||||
LSOA_POP := $(DATA_DIR)/lsoa_population.parquet
|
||||
MEDIAN_AGE := $(DATA_DIR)/median_age.parquet
|
||||
ELECTION := $(DATA_DIR)/election_results.parquet
|
||||
ENGLAND_BOUNDARY := $(DATA_DIR)/england_boundary.geojson
|
||||
|
|
@ -80,6 +81,8 @@ MAP_ASSETS_STAMP := $(MAP_ASSETS_DIR)/.done
|
|||
PMTILES_VERSION := 1.22.3
|
||||
PMTILES_BIN := $(DATA_DIR)/pmtiles
|
||||
SATELLITE_TILE_ARGS ?=
|
||||
SATELLITE_HIGHRES_ARGS ?=
|
||||
GDAL_ECW_IMAGE ?= perfect-postcode/gdal-ecw:latest
|
||||
|
||||
VALIDATE_OUTPUTS := uv run python -m pipeline.validate_outputs
|
||||
|
||||
|
|
@ -104,27 +107,29 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
|
|||
|
||||
# ── Phony aliases ─────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: prepare merge tiles satellite-tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles \
|
||||
.PHONY: prepare merge tiles satellite-tiles satellite-highres-tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles property-border-tiles \
|
||||
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
||||
download-naptan download-pois download-grocery-retail-points download-ofsted download-gias download-broadband download-conservation-areas download-listed-buildings download-rental-prices \
|
||||
download-postcodes download-noise download-inspire download-crime \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-fr-tow download-ofs-register download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-fr-tow download-nfi download-ofs-register download-places download-median-age download-england-boundary download-rightmove-outcodes \
|
||||
download-map-assets \
|
||||
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
||||
transform-school-proximity transform-tree-density \
|
||||
generate-postcode-boundaries generate-travel-times enrich-actual-listings
|
||||
|
||||
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
|
||||
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX)
|
||||
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles property-border-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
|
||||
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX) --postcode-boundary-match "$(POSTCODES_PQ)::$(PC_BOUNDARIES)"
|
||||
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
|
||||
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
|
||||
enrich-actual-listings: $(ACTUAL_LISTINGS_ENRICHED)
|
||||
tiles: $(TILES) $(SATELLITE_TILES)
|
||||
tiles: $(TILES) $(SATELLITE_TILES) $(SATELLITE_HIGHRES_TILES)
|
||||
satellite-tiles: $(SATELLITE_TILES)
|
||||
overlay-tiles: noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles
|
||||
satellite-highres-tiles: $(SATELLITE_HIGHRES_TILES)
|
||||
overlay-tiles: noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles property-border-tiles
|
||||
noise-overlay-tiles: $(NOISE_OVERLAY_TILES)
|
||||
crime-hotspot-tiles: $(CRIME_HOTSPOT_TILES)
|
||||
tree-overlay-tiles: $(TREE_OVERLAY_TILES)
|
||||
property-border-tiles: $(PROPERTY_BORDER_TILES)
|
||||
download-arcgis: $(ARCGIS)
|
||||
download-price-paid: $(PRICE_PAID)
|
||||
download-deprivation: $(IOD)
|
||||
|
|
@ -152,9 +157,9 @@ download-greenspace: $(GREENSPACE)
|
|||
download-os-greenspace: $(OS_GREENSPACE)
|
||||
download-pbf: $(PBF)
|
||||
download-fr-tow: $(FR_TOW)
|
||||
download-nfi: $(NFI)
|
||||
download-ofs-register: $(OFS_REGISTER)
|
||||
download-places: $(PLACES)
|
||||
download-lsoa-population: $(LSOA_POP)
|
||||
download-median-age: $(MEDIAN_AGE)
|
||||
download-election-results: $(ELECTION)
|
||||
download-england-boundary: $(ENGLAND_BOUNDARY)
|
||||
|
|
@ -169,11 +174,12 @@ transform-school-proximity: $(SCHOOL_PROX)
|
|||
transform-tree-density: $(TREE_DENSITY_PC)
|
||||
generate-postcode-boundaries: $(PC_BOUNDARIES_STAMP)
|
||||
|
||||
$(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(PC_BOUNDARIES_DEPS)
|
||||
$(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(ARCGIS) $(PC_BOUNDARIES_DEPS)
|
||||
@rm -f $@
|
||||
$(VALIDATE_OUTPUTS) --dir $(INSPIRE_DIR) --zip-glob "$(INSPIRE_DIR)::*.zip"
|
||||
uv run python -m pipeline.transform.postcode_boundaries \
|
||||
--uprn $(UPRN_LOOKUP) \
|
||||
--arcgis $(ARCGIS) \
|
||||
--oa-boundaries $(OA_BOUNDARIES) \
|
||||
--inspire $(INSPIRE_DIR) \
|
||||
--output $(PC_BOUNDARIES)
|
||||
|
|
@ -196,6 +202,14 @@ $(TILES): $(PMTILES_BIN)
|
|||
$(SATELLITE_TILES): $(PMTILES_BIN) pipeline/download/satellite_tiles.py pipeline/download/tiles.py
|
||||
uv run python -m pipeline.download.satellite_tiles --output $@ --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION) $(SATELLITE_TILE_ARGS)
|
||||
|
||||
# High-resolution EA Vertical Aerial Photography (London, OGL v3.0). Part of the
|
||||
# `tiles` target (and `prepare`). Heavy and Docker-dependent (ECW decode): it
|
||||
# downloads ~25 GB of ECW and builds the gdal-ecw image first. The server treats
|
||||
# the output as optional, so a stale/missing file degrades to the Sentinel base.
|
||||
$(SATELLITE_HIGHRES_TILES): $(PMTILES_BIN) pipeline/download/satellite_highres.py pipeline/download/tiles.py docker/gdal-ecw/Dockerfile
|
||||
docker build -t $(GDAL_ECW_IMAGE) docker/gdal-ecw
|
||||
uv run python -m pipeline.download.satellite_highres --output $@ --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION) --gdal-image $(GDAL_ECW_IMAGE) $(SATELLITE_HIGHRES_ARGS)
|
||||
|
||||
# EPC requires manual registration — fail with instructions
|
||||
$(EPC):
|
||||
@echo ""
|
||||
|
|
@ -238,6 +252,11 @@ $(FR_TOW):
|
|||
curl -L -A "Mozilla/5.0" -o $@.tmp "https://www.mediafire.com/file_premium/p5fve6wswwwjqrq/FR_TOW_V1_ALL.zip/file"
|
||||
mv $@.tmp $@
|
||||
|
||||
$(NFI):
|
||||
@mkdir -p $(DATA_DIR)
|
||||
curl -L -A "Mozilla/5.0" -o $@.tmp "https://opendata.arcgis.com/api/v3/datasets/83ff06ae0fd34452af9efaddd9d221e8_0/downloads/data?format=shp&spatialRefId=27700"
|
||||
mv $@.tmp $@
|
||||
|
||||
$(OFS_REGISTER):
|
||||
@mkdir -p $(DATA_DIR)
|
||||
curl -fL -A "Mozilla/5.0" -o $@.tmp https://register-api.officeforstudents.org.uk/api/Download/
|
||||
|
|
@ -276,8 +295,11 @@ $(NOISE_OVERLAY_TILES): $(PMTILES_BIN) pipeline/transform/noise_overlay_tiles.py
|
|||
$(CRIME_HOTSPOT_TILES): $(CRIME_STAMP) pipeline/transform/crime_hotspot_tiles.py pipeline/transform/crime.py
|
||||
uv run python -m pipeline.transform.crime_hotspot_tiles --input $(CRIME_DIR) --output $@
|
||||
|
||||
$(TREE_OVERLAY_TILES): $(FR_TOW) pipeline/transform/tree_overlay_tiles.py pipeline/transform/tree_density.py
|
||||
uv run python -m pipeline.transform.tree_overlay_tiles --tow-zip $(FR_TOW) --output $@
|
||||
$(TREE_OVERLAY_TILES): $(FR_TOW) $(NFI) pipeline/transform/tree_overlay_tiles.py pipeline/transform/tree_density.py
|
||||
uv run python -m pipeline.transform.tree_overlay_tiles --tow-zip $(FR_TOW) --nfi-zip $(NFI) --output $@
|
||||
|
||||
$(PROPERTY_BORDER_TILES): $(INSPIRE_STAMP) pipeline/transform/property_border_tiles.py pipeline/transform/postcode_boundaries/inspire.py
|
||||
uv run python -m pipeline.transform.property_border_tiles --inspire $(INSPIRE_DIR) --output $@
|
||||
|
||||
$(INSPIRE_STAMP): $(INSPIRE_DOWNLOAD_DEPS)
|
||||
@rm -f $@
|
||||
|
|
@ -309,9 +331,6 @@ $(OS_GREENSPACE):
|
|||
$(PLACES): $(PBF) $(ENGLAND_BOUNDARY) $(NAPTAN) $(OFS_REGISTER) $(ARCGIS)
|
||||
uv run python -m pipeline.download.places --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY) --naptan $(NAPTAN) --university-register $(OFS_REGISTER) --postcodes $(ARCGIS)
|
||||
|
||||
$(LSOA_POP):
|
||||
uv run python -m pipeline.download.lsoa_population --output $@
|
||||
|
||||
|
||||
$(MEDIAN_AGE):
|
||||
uv run python -m pipeline.download.median_age --output $@
|
||||
|
|
@ -339,13 +358,9 @@ $(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(GIAS) $(OFSTE
|
|||
$(EPC_PP): $(PRICE_PAID) $(EPC) pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
|
||||
uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@
|
||||
|
||||
$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(LSOA_LOOKUP) pipeline/transform/crime.py
|
||||
$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(PC_BOUNDARIES) pipeline/transform/crime_spatial.py pipeline/transform/postcode_boundaries/loader.py pipeline/transform/crime.py
|
||||
$(VALIDATE_OUTPUTS) --file $(CRIME_DIR)/archive_manifest.json --glob "$(CRIME_DIR)::**/*-street.csv"
|
||||
uv run python -m pipeline.transform.crime --input $(CRIME_DIR) --output $(CRIME) --output-by-year $(CRIME_BY_YEAR) --lsoa-lookup $(LSOA_LOOKUP)
|
||||
|
||||
$(LSOA_LOOKUP): pipeline/download/lsoa_2011_to_2021.py
|
||||
uv run python -m pipeline.download.lsoa_2011_to_2021 --output $@
|
||||
$(VALIDATE_OUTPUTS) --parquet $@
|
||||
uv run python -m pipeline.transform.crime_spatial --input $(CRIME_DIR) --boundaries $(PC_BOUNDARIES)/units --output $(CRIME) --output-by-year $(CRIME_BY_YEAR)
|
||||
|
||||
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DEPS)
|
||||
uv run python -m pipeline.transform.poi_proximity --arcgis $(ARCGIS) --pois $(POIS_FILTERED) --greenspace $(OS_GREENSPACE) --output $@
|
||||
|
|
@ -353,9 +368,10 @@ $(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DE
|
|||
$(SCHOOL_PROX): $(OFSTED) $(ARCGIS) pipeline/transform/school_proximity.py pipeline/utils/poi_counts.py
|
||||
uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --output $@
|
||||
|
||||
$(TREE_DENSITY_PC): $(FR_TOW) $(ARCGIS) $(PRICE_PAID) $(TREE_DENSITY_DEPS)
|
||||
$(TREE_DENSITY_PC): $(FR_TOW) $(NFI) $(ARCGIS) $(PRICE_PAID) $(TREE_DENSITY_DEPS)
|
||||
uv run python -m pipeline.transform.tree_density \
|
||||
--tow-zip $(FR_TOW) \
|
||||
--nfi-zip $(NFI) \
|
||||
--arcgis $(ARCGIS) \
|
||||
--price-paid $(PRICE_PAID) \
|
||||
--output-postcodes $(TREE_DENSITY_PC) \
|
||||
|
|
@ -371,6 +387,7 @@ $(PC_BOUNDARIES):
|
|||
@echo "Generate it with:"
|
||||
@echo " uv run python -m pipeline.transform.postcode_boundaries \\"
|
||||
@echo " --uprn $(UPRN_LOOKUP) \\"
|
||||
@echo " --arcgis $(ARCGIS) \\"
|
||||
@echo " --oa-boundaries $(OA_BOUNDARIES) \\"
|
||||
@echo " --inspire $(INSPIRE_DIR) \\"
|
||||
@echo " --output $@"
|
||||
|
|
@ -380,7 +397,7 @@ $(PC_BOUNDARIES):
|
|||
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
|
||||
|
||||
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) $(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) $(MERGE_DEPS)
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) $(MERGE_DEPS)
|
||||
@rm -f $@
|
||||
uv run python -m pipeline.transform.merge \
|
||||
--epc-pp $(EPC_PP) \
|
||||
|
|
@ -395,7 +412,6 @@ $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
|||
--conservation-areas $(CONSERVATION_AREAS) \
|
||||
--listed-buildings $(LISTED_BUILDINGS) \
|
||||
--rental-prices $(RENTAL) \
|
||||
--lsoa-population $(LSOA_POP) \
|
||||
--median-age $(MEDIAN_AGE) \
|
||||
--election-results $(ELECTION) \
|
||||
--tree-density-postcodes $(TREE_DENSITY_PC) \
|
||||
|
|
@ -423,7 +439,7 @@ $(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
|
|||
$(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) \
|
||||
$(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) \
|
||||
$(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) \
|
||||
$(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) \
|
||||
$(MERGE_DEPS) pipeline/utils/fuzzy_join.py
|
||||
uv run python -m pipeline.transform.merge \
|
||||
--epc-pp $(EPC_PP) \
|
||||
|
|
@ -438,7 +454,6 @@ $(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
|
|||
--conservation-areas $(CONSERVATION_AREAS) \
|
||||
--listed-buildings $(LISTED_BUILDINGS) \
|
||||
--rental-prices $(RENTAL) \
|
||||
--lsoa-population $(LSOA_POP) \
|
||||
--median-age $(MEDIAN_AGE) \
|
||||
--election-results $(ELECTION) \
|
||||
--tree-density-postcodes $(TREE_DENSITY_PC) \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue