This commit is contained in:
Andras Schmelczer 2026-02-15 22:39:49 +00:00
parent 03445188ea
commit 524580eb25
102 changed files with 36625 additions and 1295 deletions

View file

@ -24,8 +24,6 @@ POI_PROXIMITY := $(DATA_DIR)/poi_proximity.parquet
EPC_PP := $(DATA_DIR)/epc_pp.parquet
WIDE := $(DATA_DIR)/wide.parquet
PRICE_INDEX := $(DATA_DIR)/price_index.parquet
RENO_PREMIUM := $(DATA_DIR)/renovation_premium.parquet
HEDONIC_MODEL := $(DATA_DIR)/hedonic_model.json
PRICES_STAMP := $(DATA_DIR)/.prices_done
EPC := $(MANUAL_DATA)/certificates.csv
JT_BANK := $(MANUAL_DATA)/journey_times_bank.parquet
@ -48,6 +46,11 @@ PC_BOUNDARIES := $(MANUAL_DATA)/postcode_boundaries
TRANSIT_DIR := $(DATA_DIR)/transit
TRANSIT_STAMP := $(TRANSIT_DIR)/.done
GREENSPACE := $(DATA_DIR)/greenspace_water.parquet
PBF := $(DATA_DIR)/great-britain-latest.osm.pbf
PLACES := $(DATA_DIR)/places.parquet
RIGHTMOVE_BUY := $(DATA_DIR)/rightmove_buy.parquet
RIGHTMOVE_RENT := $(DATA_DIR)/rightmove_rent.parquet
ONLINE_STAMP := $(DATA_DIR)/.online_done
# Sentinel files for directory targets (Make doesn't track directories well)
GEOSURE_STAMP := $(GEOSURE_DIR)/.done
@ -61,7 +64,7 @@ PMTILES_VERSION := 1.22.3
download-arcgis download-price-paid download-deprivation download-ethnicity \
download-naptan download-pois download-ofsted download-broadband download-rental-prices \
download-postcodes download-geosure download-noise download-inspire \
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace \
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-pbf download-places \
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
transform-school-proximity transform-geosure transform-postcode-boundaries \
generate-postcode-boundaries \
@ -87,6 +90,8 @@ download-oa-boundaries: $(OA_BOUNDARIES)
download-uprn-lookup: $(UPRN_LOOKUP)
download-transit-network: $(TRANSIT_STAMP)
download-greenspace: $(GREENSPACE)
download-pbf: $(PBF)
download-places: $(PLACES)
transform-pois: $(POIS_FILTERED)
transform-epc-pp: $(EPC_PP)
transform-crime: $(CRIME)
@ -132,8 +137,13 @@ $(ETHNICITY):
$(NAPTAN):
uv run python -m pipeline.download.naptan --output $@
$(POIS_RAW):
uv run python -m pipeline.download.pois --output $@
$(PBF):
@mkdir -p $(DATA_DIR)
curl -L -o $@.tmp https://download.geofabrik.de/europe/great-britain-latest.osm.pbf
mv $@.tmp $@
$(POIS_RAW): $(PBF)
uv run python -m pipeline.download.pois --output $@ --pbf $(PBF)
$(OFSTED):
uv run python -m pipeline.download.ofsted --output $@
@ -168,8 +178,11 @@ $(TRANSIT_STAMP):
$(RENTAL):
uv run python -m pipeline.download.rental_prices --output $@
$(GREENSPACE):
uv run python -m pipeline.download.greenspace_water --output $@
$(GREENSPACE): $(PBF)
uv run python -m pipeline.download.greenspace_water --output $@ --pbf $(PBF)
$(PLACES): $(PBF)
uv run python -m pipeline.download.places --output $@ --pbf $(PBF)
# ── Journey times (requires TFL_API_KEY) ──────────────────────────────────────
@ -260,18 +273,20 @@ $(WIDE): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) $(JT_BANK) $(JT_FITZROVIA)
--rental-prices $(RENTAL) \
--output $@
# ── Price estimation (post-merge) ────────────────────────────────────────────
# ── Online listings (post-merge, pre-pricing) ───────────────────────────────
$(PRICE_INDEX): $(WIDE)
uv run python -m pipeline.transform.price_index --input $(WIDE) --output $@
$(RENO_PREMIUM): $(WIDE) $(PRICE_INDEX)
uv run python -m pipeline.transform.renovation_premium --input $(WIDE) --index $(PRICE_INDEX) --output $@
$(HEDONIC_MODEL): $(WIDE)
uv run python -m pipeline.transform.hedonic_quality --input $(WIDE) --output $@
$(PRICES_STAMP): $(WIDE) $(PRICE_INDEX) $(RENO_PREMIUM) $(HEDONIC_MODEL)
uv run python -m pipeline.transform.price_estimate --input $(WIDE) --index $(PRICE_INDEX) \
--renovation-premium $(RENO_PREMIUM) --hedonic-model $(HEDONIC_MODEL)
$(ONLINE_STAMP): $(WIDE) $(RIGHTMOVE_BUY) $(RIGHTMOVE_RENT)
uv run python -m pipeline.transform.add_online_listings \
--input $(WIDE) \
--buy $(RIGHTMOVE_BUY) \
--rent $(RIGHTMOVE_RENT)
@touch $@
# ── Price estimation (post-merge + online) ──────────────────────────────────
$(PRICE_INDEX): $(ONLINE_STAMP)
uv run python -m pipeline.transform.price_estimation.index --input $(WIDE) --output $@
$(PRICES_STAMP): $(ONLINE_STAMP) $(PRICE_INDEX)
uv run python -m pipeline.transform.price_estimation.estimate --input $(WIDE) --index $(PRICE_INDEX)
@touch $@