perfect-postcode/Makefile.data
2026-05-28 21:48:35 +01:00

448 lines
22 KiB
Text

# Usage:
# make -f Makefile.data prepare # Build all parquets (+ all deps)
#
# Or include from the main Makefile and use targets directly.
SHELL := /bin/bash
.DELETE_ON_ERROR:
DATA_DIR := ./property-data
MANUAL_DATA := ./manual-data
FINDER_DATA := ./finder/data
# ── Output files ──────────────────────────────────────────────────────────────
TILES := $(DATA_DIR)/uk.pmtiles
SATELLITE_TILES := $(DATA_DIR)/satellite.pmtiles
ARCGIS := $(DATA_DIR)/arcgis_data.parquet
PRICE_PAID := $(DATA_DIR)/price-paid-complete.parquet
IOD := $(DATA_DIR)/IoD2025_Scores.parquet
POIS_RAW := $(DATA_DIR)/uk_pois.parquet
GROCERY_RETAIL_POINTS := $(DATA_DIR)/geolytix_retail_points.parquet
POIS_FILTERED := $(DATA_DIR)/filtered_uk_pois.parquet
POI_PROXIMITY := $(DATA_DIR)/poi_proximity.parquet
EPC_PP := $(DATA_DIR)/epc_pp.parquet
POSTCODES_RAW := $(DATA_DIR)/gb-postcodes-v5
POSTCODES_PQ := $(DATA_DIR)/postcode.parquet
PROPERTIES_PQ := $(DATA_DIR)/properties.parquet
MERGE_STAMP := $(DATA_DIR)/.merge_done
PRICE_INDEX := $(DATA_DIR)/price_index.parquet
PRICES_STAMP := $(DATA_DIR)/.prices_done
EPC := $(MANUAL_DATA)/domestic-csv.zip
ACTUAL_LISTINGS_RAW := $(FINDER_DATA)/online_listings_buy.parquet
ACTUAL_LISTINGS_ENRICHED := $(FINDER_DATA)/online_listings_buy_enriched.parquet
ETHNICITY := $(DATA_DIR)/ethnicity_by_la.parquet
CRIME_DIR := $(DATA_DIR)/crime
CRIME := $(DATA_DIR)/crime_by_lsoa.parquet
CRIME_BY_YEAR := $(DATA_DIR)/crime_by_year_by_lsoa.parquet
CRIME_STAMP := $(CRIME_DIR)/.downloaded
LSOA_LOOKUP := $(DATA_DIR)/lsoa_2011_to_2021.parquet
NOISE := $(DATA_DIR)/road_noise.parquet
NOISE_OVERLAY_TILES := $(DATA_DIR)/noise_lden_10m.pmtiles
CRIME_HOTSPOT_TILES := $(DATA_DIR)/crime_hotspots.pmtiles
TREE_OVERLAY_TILES := $(DATA_DIR)/trees_outside_woodlands.pmtiles
OFSTED := $(DATA_DIR)/ofsted.parquet
GIAS := $(DATA_DIR)/gias.parquet
NAPTAN := $(DATA_DIR)/naptan.parquet
BROADBAND := $(DATA_DIR)/broadband.parquet
CONSERVATION_AREAS := $(DATA_DIR)/conservation_areas.geojson
LISTED_BUILDINGS := $(DATA_DIR)/listed_buildings.gpkg
SCHOOL_PROX := $(DATA_DIR)/school_proximity.parquet
RENTAL := $(DATA_DIR)/rental_prices.parquet
INSPIRE_DIR := $(DATA_DIR)/inspire
OA_BOUNDARIES := $(DATA_DIR)/oa_boundaries.gpkg
UPRN_LOOKUP := $(DATA_DIR)/uprn_lookup.parquet
PC_BOUNDARIES := $(DATA_DIR)/postcode_boundaries
PC_BOUNDARIES_STAMP := $(PC_BOUNDARIES)/.done
TRANSIT_DIR := $(DATA_DIR)/transit
TRANSIT_STAMP := $(TRANSIT_DIR)/.done
R5_NETWORK_CACHE := $(DATA_DIR)/r5-network/network.dat
GREENSPACE := $(DATA_DIR)/greenspace_water.parquet
OS_GREENSPACE := $(DATA_DIR)/os_greenspace.parquet
PBF := $(DATA_DIR)/england-latest.osm.pbf
FR_TOW := $(DATA_DIR)/FR_TOW_V1_ALL.zip
TREE_DENSITY_PC := $(DATA_DIR)/tree_density_by_postcode.parquet
TREE_DENSITY_STREETS := $(DATA_DIR)/tree_density_by_street.parquet
TREE_DENSITY_ADDR := $(DATA_DIR)/tree_density_by_address.parquet
OFS_REGISTER := $(DATA_DIR)/ofs_register.xlsx
PLACES := $(DATA_DIR)/places.parquet
LSOA_POP := $(DATA_DIR)/lsoa_population.parquet
MEDIAN_AGE := $(DATA_DIR)/median_age.parquet
ELECTION := $(DATA_DIR)/election_results.parquet
ENGLAND_BOUNDARY := $(DATA_DIR)/england_boundary.geojson
RM_OUTCODES := frontend/src/lib/rightmove-outcodes.json
MAP_ASSETS_DIR := frontend/public/assets
# Sentinel files for directory targets (Make doesn't track directories well)
INSPIRE_STAMP := $(INSPIRE_DIR)/.done
MAP_ASSETS_STAMP := $(MAP_ASSETS_DIR)/.done
PMTILES_VERSION := 1.22.3
PMTILES_BIN := $(DATA_DIR)/pmtiles
SATELLITE_TILE_ARGS ?=
VALIDATE_OUTPUTS := uv run python -m pipeline.validate_outputs
POI_PROXIMITY_DEPS := pipeline/transform/poi_proximity.py pipeline/utils/poi_counts.py
MERGE_DEPS := pipeline/transform/merge.py
PRICE_INDEX_DEPS := pipeline/transform/price_estimation/index.py pipeline/transform/price_estimation/shrinkage.py pipeline/transform/price_estimation/utils.py
PRICE_ESTIMATE_DEPS := pipeline/transform/price_estimation/estimate.py pipeline/transform/price_estimation/knn.py pipeline/transform/price_estimation/utils.py
TREE_DENSITY_DEPS := pipeline/transform/tree_density.py
PC_BOUNDARIES_DEPS := pipeline/transform/postcode_boundaries/__main__.py \
pipeline/transform/postcode_boundaries/greenspace.py \
pipeline/transform/postcode_boundaries/inspire.py \
pipeline/transform/postcode_boundaries/memory.py \
pipeline/transform/postcode_boundaries/oa_boundaries.py \
pipeline/transform/postcode_boundaries/output.py \
pipeline/transform/postcode_boundaries/process_oa.py \
pipeline/transform/postcode_boundaries/uprn.py \
pipeline/transform/postcode_boundaries/voronoi.py
CRIME_DOWNLOAD_DEPS := pipeline/download/crime.py
INSPIRE_DOWNLOAD_DEPS := pipeline/download/inspire.py
TRANSIT_DOWNLOAD_DEPS := pipeline/download/transit_network.py pipeline/download/transxchange2gtfs_shim.js
MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_poi.py
# ── Phony aliases ─────────────────────────────────────────────────────────────
.PHONY: prepare merge tiles satellite-tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles \
download-arcgis download-price-paid download-deprivation download-ethnicity \
download-naptan download-pois download-grocery-retail-points download-ofsted download-gias download-broadband download-conservation-areas download-listed-buildings download-rental-prices \
download-postcodes download-noise download-inspire download-crime \
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-fr-tow download-ofs-register download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \
download-map-assets \
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
transform-school-proximity transform-tree-density \
generate-postcode-boundaries generate-travel-times enrich-actual-listings
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX)
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
enrich-actual-listings: $(ACTUAL_LISTINGS_ENRICHED)
tiles: $(TILES) $(SATELLITE_TILES)
satellite-tiles: $(SATELLITE_TILES)
overlay-tiles: noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles
noise-overlay-tiles: $(NOISE_OVERLAY_TILES)
crime-hotspot-tiles: $(CRIME_HOTSPOT_TILES)
tree-overlay-tiles: $(TREE_OVERLAY_TILES)
download-arcgis: $(ARCGIS)
download-price-paid: $(PRICE_PAID)
download-deprivation: $(IOD)
download-ethnicity: $(ETHNICITY)
download-crime: $(CRIME_STAMP)
$(VALIDATE_OUTPUTS) --file $(CRIME_DIR)/archive_manifest.json --glob "$(CRIME_DIR)::**/*-street.csv"
download-naptan: $(NAPTAN)
download-pois: $(POIS_RAW)
download-grocery-retail-points: $(GROCERY_RETAIL_POINTS)
download-ofsted: $(OFSTED)
download-gias: $(GIAS)
download-broadband: $(BROADBAND)
download-conservation-areas: $(CONSERVATION_AREAS)
download-listed-buildings: $(LISTED_BUILDINGS)
download-postcodes: $(POSTCODES_RAW)
download-rental-prices: $(RENTAL)
download-noise: $(NOISE)
download-inspire: $(INSPIRE_STAMP)
$(VALIDATE_OUTPUTS) --dir $(INSPIRE_DIR) --zip-glob "$(INSPIRE_DIR)::*.zip"
download-oa-boundaries: $(OA_BOUNDARIES)
download-uprn-lookup: $(UPRN_LOOKUP)
download-transit-network: $(TRANSIT_STAMP)
$(VALIDATE_OUTPUTS) --file $(TRANSIT_DIR)/raw/england.osm.pbf --zip $(TRANSIT_DIR)/bods_gtfs.zip --zip $(TRANSIT_DIR)/tfl_gtfs.zip
download-greenspace: $(GREENSPACE)
download-os-greenspace: $(OS_GREENSPACE)
download-pbf: $(PBF)
download-fr-tow: $(FR_TOW)
download-ofs-register: $(OFS_REGISTER)
download-places: $(PLACES)
download-lsoa-population: $(LSOA_POP)
download-median-age: $(MEDIAN_AGE)
download-election-results: $(ELECTION)
download-england-boundary: $(ENGLAND_BOUNDARY)
download-rightmove-outcodes: $(RM_OUTCODES)
download-map-assets: $(MAP_ASSETS_STAMP)
$(VALIDATE_OUTPUTS) --file $(MAP_ASSETS_DIR)/sprites/light.json --file $(MAP_ASSETS_DIR)/sprites/light.png --file $(MAP_ASSETS_DIR)/sprites/dark.json --file $(MAP_ASSETS_DIR)/sprites/dark.png --glob "$(MAP_ASSETS_DIR)/fonts::**/*.pbf" --glob "$(MAP_ASSETS_DIR)/twemoji::*.png" --glob "$(MAP_ASSETS_DIR)/poi-icons::**/*"
transform-pois: $(POIS_FILTERED)
transform-epc-pp: $(EPC_PP)
transform-crime: $(CRIME)
transform-poi-proximity: $(POI_PROXIMITY)
transform-school-proximity: $(SCHOOL_PROX)
transform-tree-density: $(TREE_DENSITY_PC)
generate-postcode-boundaries: $(PC_BOUNDARIES_STAMP)
$(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(PC_BOUNDARIES_DEPS)
@rm -f $@
$(VALIDATE_OUTPUTS) --dir $(INSPIRE_DIR) --zip-glob "$(INSPIRE_DIR)::*.zip"
uv run python -m pipeline.transform.postcode_boundaries \
--uprn $(UPRN_LOOKUP) \
--oa-boundaries $(OA_BOUNDARIES) \
--inspire $(INSPIRE_DIR) \
--output $(PC_BOUNDARIES)
@touch $@
generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) download-transit-network
@if [ -f "$(R5_NETWORK_CACHE)" ] && { [ "$(PBF)" -nt "$(R5_NETWORK_CACHE)" ] || [ "$(TRANSIT_STAMP)" -nt "$(R5_NETWORK_CACHE)" ]; }; then \
echo "R5 inputs are newer than $(R5_NETWORK_CACHE); deleting stale cache"; \
rm -f "$(R5_NETWORK_CACHE)"; \
fi
./r5-java/run.sh
# ── Downloads ─────────────────────────────────────────────────────────────────
$(PMTILES_BIN): pipeline/download/tiles.py
uv run python -c 'from pathlib import Path; from pipeline.download.tiles import ensure_pmtiles_cli; ensure_pmtiles_cli(Path("$(PMTILES_BIN)"), "$(PMTILES_VERSION)")'
$(TILES): $(PMTILES_BIN)
uv run -m pipeline.download.tiles --output $@ --pmtiles-version $(PMTILES_VERSION)
$(SATELLITE_TILES): $(PMTILES_BIN) pipeline/download/satellite_tiles.py pipeline/download/tiles.py
uv run python -m pipeline.download.satellite_tiles --output $@ --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION) $(SATELLITE_TILE_ARGS)
# EPC requires manual registration — fail with instructions
$(EPC):
@echo ""
@echo "=== EPC dataset not found ==="
@echo "The EPC certificates archive is required: $@"
@echo ""
@echo "To obtain it, register at https://get-energy-performance-data.communities.gov.uk/filter-properties?property_type=domestic"
@echo "and place domestic-csv.zip in manual-data/"
@echo ""
@exit 1
$(ARCGIS):
uv run python -m pipeline.download.arcgis --output $@
$(PRICE_PAID):
uv run python -m pipeline.download.price_paid --output $@
$(IOD):
uv run python -m pipeline.download.deprivation_data --output $@
$(ETHNICITY): pipeline/download/ethnicity.py
uv run python -m pipeline.download.ethnicity --output $@
$(CRIME_STAMP): $(CRIME_DOWNLOAD_DEPS)
@rm -f $@
uv run python -m pipeline.download.crime --output $(CRIME_DIR)
$(VALIDATE_OUTPUTS) --file $(CRIME_DIR)/archive_manifest.json --glob "$(CRIME_DIR)::**/*-street.csv"
@touch $@
$(NAPTAN):
uv run python -m pipeline.download.naptan --output $@
$(PBF):
@mkdir -p $(DATA_DIR)
curl -L -o $@.tmp https://download.geofabrik.de/europe/united-kingdom/england-latest.osm.pbf
mv $@.tmp $@
$(FR_TOW):
@mkdir -p $(DATA_DIR)
curl -L -A "Mozilla/5.0" -o $@.tmp "https://www.mediafire.com/file_premium/p5fve6wswwwjqrq/FR_TOW_V1_ALL.zip/file"
mv $@.tmp $@
$(OFS_REGISTER):
@mkdir -p $(DATA_DIR)
curl -fL -A "Mozilla/5.0" -o $@.tmp https://register-api.officeforstudents.org.uk/api/Download/
mv $@.tmp $@
$(POIS_RAW): $(PBF) $(ENGLAND_BOUNDARY) pipeline/download/pois.py
uv run python -m pipeline.download.pois --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY)
$(GROCERY_RETAIL_POINTS):
uv run python -m pipeline.download.geolytix_retail_points --output $@
$(OFSTED):
uv run python -m pipeline.download.ofsted --output $@
$(GIAS): pipeline/download/gias.py
uv run python -m pipeline.download.gias --output $@
$(BROADBAND):
uv run python -m pipeline.download.broadband --output $@
$(CONSERVATION_AREAS): pipeline/download/conservation_areas.py
uv run python -m pipeline.download.conservation_areas --output $@
$(LISTED_BUILDINGS): pipeline/download/listed_buildings.py
uv run python -m pipeline.download.listed_buildings --output $@
$(POSTCODES_RAW):
uv run python -m pipeline.download.postcodes --output $@
$(NOISE): $(ARCGIS) pipeline/download/noise.py
uv run python -m pipeline.download.noise --arcgis $(ARCGIS) --output $@
$(NOISE_OVERLAY_TILES): $(PMTILES_BIN) pipeline/transform/noise_overlay_tiles.py pipeline/download/noise.py pipeline/download/tiles.py
uv run python -m pipeline.transform.noise_overlay_tiles --output $@ --raster-dir $(DATA_DIR)/noise_overlay_rasters --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION)
$(CRIME_HOTSPOT_TILES): $(CRIME_STAMP) pipeline/transform/crime_hotspot_tiles.py pipeline/transform/crime.py
uv run python -m pipeline.transform.crime_hotspot_tiles --input $(CRIME_DIR) --output $@
$(TREE_OVERLAY_TILES): $(FR_TOW) pipeline/transform/tree_overlay_tiles.py pipeline/transform/tree_density.py
uv run python -m pipeline.transform.tree_overlay_tiles --tow-zip $(FR_TOW) --output $@
$(INSPIRE_STAMP): $(INSPIRE_DOWNLOAD_DEPS)
@rm -f $@
uv run python -m pipeline.download.inspire --output $(INSPIRE_DIR)
$(VALIDATE_OUTPUTS) --dir $(INSPIRE_DIR) --zip-glob "$(INSPIRE_DIR)::*.zip"
@touch $@
$(OA_BOUNDARIES):
uv run python -m pipeline.download.oa_boundaries --output $@
$(UPRN_LOOKUP):
uv run python -m pipeline.download.uprn_lookup --output $@
$(TRANSIT_STAMP): $(TRANSIT_DOWNLOAD_DEPS)
@rm -f $@
uv run python -m pipeline.download.transit_network --output $(TRANSIT_DIR)
$(VALIDATE_OUTPUTS) --file $(TRANSIT_DIR)/raw/england.osm.pbf --zip $(TRANSIT_DIR)/bods_gtfs.zip --zip $(TRANSIT_DIR)/tfl_gtfs.zip
@touch $@
$(RENTAL): pipeline/download/rental_prices.py
uv run python -m pipeline.download.rental_prices --output $@
$(GREENSPACE): $(PBF)
uv run python -m pipeline.download.greenspace_water --output $@ --pbf $(PBF)
$(OS_GREENSPACE):
uv run python -m pipeline.download.os_greenspace --output $@
$(PLACES): $(PBF) $(ENGLAND_BOUNDARY) $(NAPTAN) $(OFS_REGISTER) $(ARCGIS)
uv run python -m pipeline.download.places --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY) --naptan $(NAPTAN) --university-register $(OFS_REGISTER) --postcodes $(ARCGIS)
$(LSOA_POP):
uv run python -m pipeline.download.lsoa_population --output $@
$(MEDIAN_AGE):
uv run python -m pipeline.download.median_age --output $@
$(ELECTION): pipeline/download/election_results.py
uv run python -m pipeline.download.election_results --output $@
$(ENGLAND_BOUNDARY):
uv run python -m pipeline.download.england_boundary --output $@
$(RM_OUTCODES): $(MERGE_STAMP)
uv run python -m pipeline.download.rightmove_outcodes --postcodes $(POSTCODES_PQ) --output $@
$(MAP_ASSETS_STAMP): $(MAP_ASSETS_DEPS)
@rm -f $@
uv run python -m pipeline.download.map_assets --output $(MAP_ASSETS_DIR)
$(VALIDATE_OUTPUTS) --file $(MAP_ASSETS_DIR)/sprites/light.json --file $(MAP_ASSETS_DIR)/sprites/light.png --file $(MAP_ASSETS_DIR)/sprites/dark.json --file $(MAP_ASSETS_DIR)/sprites/dark.png --glob "$(MAP_ASSETS_DIR)/fonts::**/*.pbf" --glob "$(MAP_ASSETS_DIR)/twemoji::*.png" --glob "$(MAP_ASSETS_DIR)/poi-icons::**/*"
@touch $@
# ── Transforms ────────────────────────────────────────────────────────────────
$(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(GIAS) $(OFSTED) $(ENGLAND_BOUNDARY) pipeline/transform/transform_poi.py pipeline/utils/england_geometry.py
uv run python -m pipeline.transform.transform_poi --input $(POIS_RAW) --naptan $(NAPTAN) --boundary $(ENGLAND_BOUNDARY) --grocery-retail-points $(GROCERY_RETAIL_POINTS) --gias $(GIAS) --ofsted $(OFSTED) --output $@
$(EPC_PP): $(PRICE_PAID) $(EPC) pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@
$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(LSOA_LOOKUP) pipeline/transform/crime.py
$(VALIDATE_OUTPUTS) --file $(CRIME_DIR)/archive_manifest.json --glob "$(CRIME_DIR)::**/*-street.csv"
uv run python -m pipeline.transform.crime --input $(CRIME_DIR) --output $(CRIME) --output-by-year $(CRIME_BY_YEAR) --lsoa-lookup $(LSOA_LOOKUP)
$(LSOA_LOOKUP): pipeline/download/lsoa_2011_to_2021.py
uv run python -m pipeline.download.lsoa_2011_to_2021 --output $@
$(VALIDATE_OUTPUTS) --parquet $@
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DEPS)
uv run python -m pipeline.transform.poi_proximity --arcgis $(ARCGIS) --pois $(POIS_FILTERED) --greenspace $(OS_GREENSPACE) --output $@
$(SCHOOL_PROX): $(OFSTED) $(ARCGIS) pipeline/transform/school_proximity.py pipeline/utils/poi_counts.py
uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --output $@
$(TREE_DENSITY_PC): $(FR_TOW) $(ARCGIS) $(PRICE_PAID) $(TREE_DENSITY_DEPS)
uv run python -m pipeline.transform.tree_density \
--tow-zip $(FR_TOW) \
--arcgis $(ARCGIS) \
--price-paid $(PRICE_PAID) \
--output-postcodes $(TREE_DENSITY_PC) \
--output-streets $(TREE_DENSITY_STREETS) \
--output-addresses $(TREE_DENSITY_ADDR)
# Postcode boundaries require manual generation — fail with instructions
$(PC_BOUNDARIES):
@echo ""
@echo "=== Postcode boundaries not found ==="
@echo "The postcode boundaries directory is required: $@"
@echo ""
@echo "Generate it with:"
@echo " uv run python -m pipeline.transform.postcode_boundaries \\"
@echo " --uprn $(UPRN_LOOKUP) \\"
@echo " --oa-boundaries $(OA_BOUNDARIES) \\"
@echo " --inspire $(INSPIRE_DIR) \\"
@echo " --output $@"
@echo ""
@exit 1
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) $(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) $(MERGE_DEPS)
@rm -f $@
uv run python -m pipeline.transform.merge \
--epc-pp $(EPC_PP) \
--arcgis $(ARCGIS) \
--iod $(IOD) \
--poi-proximity $(POI_PROXIMITY) \
--ethnicity $(ETHNICITY) \
--crime $(CRIME) \
--noise $(NOISE) \
--school-proximity $(SCHOOL_PROX) \
--broadband $(BROADBAND) \
--conservation-areas $(CONSERVATION_AREAS) \
--listed-buildings $(LISTED_BUILDINGS) \
--rental-prices $(RENTAL) \
--lsoa-population $(LSOA_POP) \
--median-age $(MEDIAN_AGE) \
--election-results $(ELECTION) \
--tree-density-postcodes $(TREE_DENSITY_PC) \
--output-postcodes $(POSTCODES_PQ) \
--output-properties $(PROPERTIES_PQ)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
@touch $@
# ── Price estimation (post-merge) ───────────────────────────────────────────
$(POSTCODES_PQ) $(PROPERTIES_PQ) &: $(MERGE_STAMP)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
$(PRICE_INDEX): $(MERGE_STAMP) $(PRICE_INDEX_DEPS) | $(PROPERTIES_PQ) $(POSTCODES_PQ)
uv run python -m pipeline.transform.price_estimation.index --input $(PROPERTIES_PQ) --postcodes $(POSTCODES_PQ) --output $@
$(VALIDATE_OUTPUTS) --parquet $@
$(PRICES_STAMP): $(MERGE_STAMP) $(PRICE_INDEX) $(PRICE_ESTIMATE_DEPS) | $(PROPERTIES_PQ) $(POSTCODES_PQ)
@rm -f $@
uv run python -m pipeline.transform.price_estimation.estimate --properties $(PROPERTIES_PQ) --postcodes $(POSTCODES_PQ) --index $(PRICE_INDEX)
$(VALIDATE_OUTPUTS) --parquet $(PROPERTIES_PQ) --parquet $(POSTCODES_PQ) --parquet $(PRICE_INDEX)
@touch $@
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
$(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) \
$(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) \
$(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) \
$(MERGE_DEPS) pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.merge \
--epc-pp $(EPC_PP) \
--arcgis $(ARCGIS) \
--iod $(IOD) \
--poi-proximity $(POI_PROXIMITY) \
--ethnicity $(ETHNICITY) \
--crime $(CRIME) \
--noise $(NOISE) \
--school-proximity $(SCHOOL_PROX) \
--broadband $(BROADBAND) \
--conservation-areas $(CONSERVATION_AREAS) \
--listed-buildings $(LISTED_BUILDINGS) \
--rental-prices $(RENTAL) \
--lsoa-population $(LSOA_POP) \
--median-age $(MEDIAN_AGE) \
--election-results $(ELECTION) \
--tree-density-postcodes $(TREE_DENSITY_PC) \
--actual-listings $(ACTUAL_LISTINGS_RAW) \
--epc $(EPC) \
--output-listings $@
$(VALIDATE_OUTPUTS) --parquet $@