This commit is contained in:
Andras Schmelczer 2026-05-28 21:48:35 +01:00
parent 39ef5c6646
commit c995f12f8b
78 changed files with 4830 additions and 1619 deletions

View file

@ -13,6 +13,7 @@ FINDER_DATA := ./finder/data
# ── Output files ──────────────────────────────────────────────────────────────
TILES := $(DATA_DIR)/uk.pmtiles
SATELLITE_TILES := $(DATA_DIR)/satellite.pmtiles
ARCGIS := $(DATA_DIR)/arcgis_data.parquet
PRICE_PAID := $(DATA_DIR)/price-paid-complete.parquet
IOD := $(DATA_DIR)/IoD2025_Scores.parquet
@ -44,7 +45,7 @@ OFSTED := $(DATA_DIR)/ofsted.parquet
GIAS := $(DATA_DIR)/gias.parquet
NAPTAN := $(DATA_DIR)/naptan.parquet
BROADBAND := $(DATA_DIR)/broadband.parquet
CONSERVATION_AREAS := $(DATA_DIR)/conservation_areas.gpkg
CONSERVATION_AREAS := $(DATA_DIR)/conservation_areas.geojson
LISTED_BUILDINGS := $(DATA_DIR)/listed_buildings.gpkg
SCHOOL_PROX := $(DATA_DIR)/school_proximity.parquet
RENTAL := $(DATA_DIR)/rental_prices.parquet
@ -77,6 +78,8 @@ INSPIRE_STAMP := $(INSPIRE_DIR)/.done
MAP_ASSETS_STAMP := $(MAP_ASSETS_DIR)/.done
PMTILES_VERSION := 1.22.3
PMTILES_BIN := $(DATA_DIR)/pmtiles
SATELLITE_TILE_ARGS ?=
VALIDATE_OUTPUTS := uv run python -m pipeline.validate_outputs
@ -101,7 +104,7 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
# ── Phony aliases ─────────────────────────────────────────────────────────────
.PHONY: prepare merge tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles \
.PHONY: prepare merge tiles satellite-tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles \
download-arcgis download-price-paid download-deprivation download-ethnicity \
download-naptan download-pois download-grocery-retail-points download-ofsted download-gias download-broadband download-conservation-areas download-listed-buildings download-rental-prices \
download-postcodes download-noise download-inspire download-crime \
@ -111,12 +114,13 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
transform-school-proximity transform-tree-density \
generate-postcode-boundaries generate-travel-times enrich-actual-listings
prepare: $(PRICES_STAMP) download-places tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX)
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
enrich-actual-listings: $(ACTUAL_LISTINGS_ENRICHED)
tiles: $(TILES)
tiles: $(TILES) $(SATELLITE_TILES)
satellite-tiles: $(SATELLITE_TILES)
overlay-tiles: noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles
noise-overlay-tiles: $(NOISE_OVERLAY_TILES)
crime-hotspot-tiles: $(CRIME_HOTSPOT_TILES)
@ -183,9 +187,15 @@ generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) download-transit-network
# ── Downloads ─────────────────────────────────────────────────────────────────
$(TILES):
$(PMTILES_BIN): pipeline/download/tiles.py
uv run python -c 'from pathlib import Path; from pipeline.download.tiles import ensure_pmtiles_cli; ensure_pmtiles_cli(Path("$(PMTILES_BIN)"), "$(PMTILES_VERSION)")'
$(TILES): $(PMTILES_BIN)
uv run -m pipeline.download.tiles --output $@ --pmtiles-version $(PMTILES_VERSION)
$(SATELLITE_TILES): $(PMTILES_BIN) pipeline/download/satellite_tiles.py pipeline/download/tiles.py
uv run python -m pipeline.download.satellite_tiles --output $@ --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION) $(SATELLITE_TILE_ARGS)
# EPC requires manual registration — fail with instructions
$(EPC):
@echo ""
@ -260,8 +270,8 @@ $(POSTCODES_RAW):
$(NOISE): $(ARCGIS) pipeline/download/noise.py
uv run python -m pipeline.download.noise --arcgis $(ARCGIS) --output $@
$(NOISE_OVERLAY_TILES): pipeline/transform/noise_overlay_tiles.py pipeline/download/noise.py pipeline/download/tiles.py
uv run python -m pipeline.transform.noise_overlay_tiles --output $@ --raster-dir $(DATA_DIR)/noise_overlay_rasters --pmtiles-bin $(DATA_DIR)/pmtiles --pmtiles-version $(PMTILES_VERSION)
$(NOISE_OVERLAY_TILES): $(PMTILES_BIN) pipeline/transform/noise_overlay_tiles.py pipeline/download/noise.py pipeline/download/tiles.py
uv run python -m pipeline.transform.noise_overlay_tiles --output $@ --raster-dir $(DATA_DIR)/noise_overlay_rasters --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION)
$(CRIME_HOTSPOT_TILES): $(CRIME_STAMP) pipeline/transform/crime_hotspot_tiles.py pipeline/transform/crime.py
uv run python -m pipeline.transform.crime_hotspot_tiles --input $(CRIME_DIR) --output $@
@ -409,12 +419,30 @@ $(PRICES_STAMP): $(MERGE_STAMP) $(PRICE_INDEX) $(PRICE_ESTIMATE_DEPS) | $(PROPER
$(VALIDATE_OUTPUTS) --parquet $(PROPERTIES_PQ) --parquet $(POSTCODES_PQ) --parquet $(PRICE_INDEX)
@touch $@
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(PRICES_STAMP) $(POSTCODES_PQ) $(ARCGIS) $(EPC) \
pipeline/transform/enrich_actual_listings.py pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.enrich_actual_listings \
--listings $(ACTUAL_LISTINGS_RAW) \
--properties $(PROPERTIES_PQ) \
--postcode-features $(POSTCODES_PQ) \
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
$(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) \
$(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) \
$(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) \
$(MERGE_DEPS) pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.merge \
--epc-pp $(EPC_PP) \
--arcgis $(ARCGIS) \
--iod $(IOD) \
--poi-proximity $(POI_PROXIMITY) \
--ethnicity $(ETHNICITY) \
--crime $(CRIME) \
--noise $(NOISE) \
--school-proximity $(SCHOOL_PROX) \
--broadband $(BROADBAND) \
--conservation-areas $(CONSERVATION_AREAS) \
--listed-buildings $(LISTED_BUILDINGS) \
--rental-prices $(RENTAL) \
--lsoa-population $(LSOA_POP) \
--median-age $(MEDIAN_AGE) \
--election-results $(ELECTION) \
--tree-density-postcodes $(TREE_DENSITY_PC) \
--actual-listings $(ACTUAL_LISTINGS_RAW) \
--epc $(EPC) \
--output $@
--output-listings $@
$(VALIDATE_OUTPUTS) --parquet $@