Rerun data pipelines

This commit is contained in:
Andras Schmelczer 2026-05-10 14:49:53 +01:00
parent 4c95815dc8
commit fc10381692
27 changed files with 2143 additions and 215 deletions

View file

@ -29,10 +29,11 @@ PROPERTIES_PQ := $(DATA_DIR)/properties.parquet
MERGE_STAMP := $(DATA_DIR)/.merge_done
PRICE_INDEX := $(DATA_DIR)/price_index.parquet
PRICES_STAMP := $(DATA_DIR)/.prices_done
EPC := $(MANUAL_DATA)/certificates.csv
EPC := $(MANUAL_DATA)/domestic-csv.zip
ETHNICITY := $(DATA_DIR)/ethnicity_by_la.parquet
CRIME_DIR := $(MANUAL_DATA)/crime
CRIME := $(DATA_DIR)/crime_by_lsoa.parquet
CRIME_STAMP := $(CRIME_DIR)/.downloaded
NOISE := $(DATA_DIR)/road_noise.parquet
OFSTED := $(DATA_DIR)/ofsted.parquet
NAPTAN := $(DATA_DIR)/naptan.parquet
@ -65,7 +66,7 @@ PMTILES_VERSION := 1.22.3
.PHONY: prepare merge tiles \
download-arcgis download-price-paid download-deprivation download-ethnicity \
download-naptan download-pois download-grocery-retail-points download-ofsted download-broadband download-rental-prices \
download-postcodes download-noise download-inspire \
download-postcodes download-noise download-inspire download-crime \
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
transform-school-proximity transform-postcode-boundaries \
@ -78,6 +79,7 @@ download-arcgis: $(ARCGIS)
download-price-paid: $(PRICE_PAID)
download-deprivation: $(IOD)
download-ethnicity: $(ETHNICITY)
download-crime: $(CRIME_STAMP)
download-naptan: $(NAPTAN)
download-pois: $(POIS_RAW)
download-grocery-retail-points: $(GROCERY_RETAIL_POINTS)
@ -121,10 +123,10 @@ $(TILES):
$(EPC):
@echo ""
@echo "=== EPC dataset not found ==="
@echo "The EPC certificates file is required: $@"
@echo "The EPC certificates archive is required: $@"
@echo ""
@echo "To obtain it, register at https://epc.opendatacommunities.org/login"
@echo "and place certificates.csv in manual-data/"
@echo "To obtain it, register at https://get-energy-performance-data.communities.gov.uk/filter-properties?property_type=domestic"
@echo "and place domestic-csv.zip in manual-data/"
@echo ""
@exit 1
@ -140,6 +142,10 @@ $(IOD):
$(ETHNICITY):
uv run python -m pipeline.download.ethnicity --output $@
$(CRIME_STAMP):
uv run python -m pipeline.download.crime --output $(CRIME_DIR)
@touch $@
$(NAPTAN):
uv run python -m pipeline.download.naptan --output $@
@ -216,15 +222,7 @@ $(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(ENGLAND_BOUND
$(EPC_PP): $(PRICE_PAID) $(EPC)
uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@
$(CRIME):
@if [ ! -d "$(CRIME_DIR)" ]; then \
echo ""; \
echo "=== Crime dataset not found ==="; \
echo "Place police.uk crime CSVs in $(CRIME_DIR)/"; \
echo "Download from https://data.police.uk/data/"; \
echo ""; \
exit 1; \
fi
$(CRIME): $(CRIME_STAMP)
uv run python -m pipeline.transform.crime --input $(CRIME_DIR) --output $@
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE)