This commit is contained in:
Andras Schmelczer 2026-05-26 19:45:13 +01:00
parent c645b0f1d4
commit 39ef5c6646
79 changed files with 5660 additions and 2199 deletions

View file

@ -8,6 +8,7 @@ SHELL := /bin/bash
DATA_DIR := ./property-data
MANUAL_DATA := ./manual-data
FINDER_DATA := ./finder/data
# ── Output files ──────────────────────────────────────────────────────────────
@ -27,6 +28,8 @@ MERGE_STAMP := $(DATA_DIR)/.merge_done
PRICE_INDEX := $(DATA_DIR)/price_index.parquet
PRICES_STAMP := $(DATA_DIR)/.prices_done
EPC := $(MANUAL_DATA)/domestic-csv.zip
ACTUAL_LISTINGS_RAW := $(FINDER_DATA)/online_listings_buy.parquet
ACTUAL_LISTINGS_ENRICHED := $(FINDER_DATA)/online_listings_buy_enriched.parquet
ETHNICITY := $(DATA_DIR)/ethnicity_by_la.parquet
CRIME_DIR := $(DATA_DIR)/crime
CRIME := $(DATA_DIR)/crime_by_lsoa.parquet
@ -106,12 +109,13 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
download-map-assets \
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
transform-school-proximity transform-tree-density \
generate-postcode-boundaries generate-travel-times
generate-postcode-boundaries generate-travel-times enrich-actual-listings
prepare: $(PRICES_STAMP) download-places tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX)
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
enrich-actual-listings: $(ACTUAL_LISTINGS_ENRICHED)
tiles: $(TILES)
overlay-tiles: noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles
noise-overlay-tiles: $(NOISE_OVERLAY_TILES)
@ -319,8 +323,8 @@ $(MAP_ASSETS_STAMP): $(MAP_ASSETS_DEPS)
# ── Transforms ────────────────────────────────────────────────────────────────
$(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(GIAS) $(ENGLAND_BOUNDARY) pipeline/transform/transform_poi.py pipeline/utils/england_geometry.py
uv run python -m pipeline.transform.transform_poi --input $(POIS_RAW) --naptan $(NAPTAN) --boundary $(ENGLAND_BOUNDARY) --grocery-retail-points $(GROCERY_RETAIL_POINTS) --gias $(GIAS) --output $@
$(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(GIAS) $(OFSTED) $(ENGLAND_BOUNDARY) pipeline/transform/transform_poi.py pipeline/utils/england_geometry.py
uv run python -m pipeline.transform.transform_poi --input $(POIS_RAW) --naptan $(NAPTAN) --boundary $(ENGLAND_BOUNDARY) --grocery-retail-points $(GROCERY_RETAIL_POINTS) --gias $(GIAS) --ofsted $(OFSTED) --output $@
$(EPC_PP): $(PRICE_PAID) $(EPC) pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@
@ -404,3 +408,13 @@ $(PRICES_STAMP): $(MERGE_STAMP) $(PRICE_INDEX) $(PRICE_ESTIMATE_DEPS) | $(PROPER
uv run python -m pipeline.transform.price_estimation.estimate --properties $(PROPERTIES_PQ) --postcodes $(POSTCODES_PQ) --index $(PRICE_INDEX)
$(VALIDATE_OUTPUTS) --parquet $(PROPERTIES_PQ) --parquet $(POSTCODES_PQ) --parquet $(PRICE_INDEX)
@touch $@
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(PRICES_STAMP) $(POSTCODES_PQ) $(ARCGIS) $(EPC) \
pipeline/transform/enrich_actual_listings.py pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.enrich_actual_listings \
--listings $(ACTUAL_LISTINGS_RAW) \
--properties $(PROPERTIES_PQ) \
--postcode-features $(POSTCODES_PQ) \
--arcgis $(ARCGIS) \
--epc $(EPC) \
--output $@