Rerun data pipelines
This commit is contained in:
parent
4c95815dc8
commit
fc10381692
27 changed files with 2143 additions and 215 deletions
|
|
@ -29,10 +29,11 @@ PROPERTIES_PQ := $(DATA_DIR)/properties.parquet
|
|||
MERGE_STAMP := $(DATA_DIR)/.merge_done
|
||||
PRICE_INDEX := $(DATA_DIR)/price_index.parquet
|
||||
PRICES_STAMP := $(DATA_DIR)/.prices_done
|
||||
EPC := $(MANUAL_DATA)/certificates.csv
|
||||
EPC := $(MANUAL_DATA)/domestic-csv.zip
|
||||
ETHNICITY := $(DATA_DIR)/ethnicity_by_la.parquet
|
||||
CRIME_DIR := $(MANUAL_DATA)/crime
|
||||
CRIME := $(DATA_DIR)/crime_by_lsoa.parquet
|
||||
CRIME_STAMP := $(CRIME_DIR)/.downloaded
|
||||
NOISE := $(DATA_DIR)/road_noise.parquet
|
||||
OFSTED := $(DATA_DIR)/ofsted.parquet
|
||||
NAPTAN := $(DATA_DIR)/naptan.parquet
|
||||
|
|
@ -65,7 +66,7 @@ PMTILES_VERSION := 1.22.3
|
|||
.PHONY: prepare merge tiles \
|
||||
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
||||
download-naptan download-pois download-grocery-retail-points download-ofsted download-broadband download-rental-prices \
|
||||
download-postcodes download-noise download-inspire \
|
||||
download-postcodes download-noise download-inspire download-crime \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \
|
||||
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
||||
transform-school-proximity transform-postcode-boundaries \
|
||||
|
|
@ -78,6 +79,7 @@ download-arcgis: $(ARCGIS)
|
|||
download-price-paid: $(PRICE_PAID)
|
||||
download-deprivation: $(IOD)
|
||||
download-ethnicity: $(ETHNICITY)
|
||||
download-crime: $(CRIME_STAMP)
|
||||
download-naptan: $(NAPTAN)
|
||||
download-pois: $(POIS_RAW)
|
||||
download-grocery-retail-points: $(GROCERY_RETAIL_POINTS)
|
||||
|
|
@ -121,10 +123,10 @@ $(TILES):
|
|||
$(EPC):
|
||||
@echo ""
|
||||
@echo "=== EPC dataset not found ==="
|
||||
@echo "The EPC certificates file is required: $@"
|
||||
@echo "The EPC certificates archive is required: $@"
|
||||
@echo ""
|
||||
@echo "To obtain it, register at https://epc.opendatacommunities.org/login"
|
||||
@echo "and place certificates.csv in manual-data/"
|
||||
@echo "To obtain it, register at https://get-energy-performance-data.communities.gov.uk/filter-properties?property_type=domestic"
|
||||
@echo "and place domestic-csv.zip in manual-data/"
|
||||
@echo ""
|
||||
@exit 1
|
||||
|
||||
|
|
@ -140,6 +142,10 @@ $(IOD):
|
|||
$(ETHNICITY):
|
||||
uv run python -m pipeline.download.ethnicity --output $@
|
||||
|
||||
$(CRIME_STAMP):
|
||||
uv run python -m pipeline.download.crime --output $(CRIME_DIR)
|
||||
@touch $@
|
||||
|
||||
$(NAPTAN):
|
||||
uv run python -m pipeline.download.naptan --output $@
|
||||
|
||||
|
|
@ -216,15 +222,7 @@ $(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(ENGLAND_BOUND
|
|||
$(EPC_PP): $(PRICE_PAID) $(EPC)
|
||||
uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@
|
||||
|
||||
$(CRIME):
|
||||
@if [ ! -d "$(CRIME_DIR)" ]; then \
|
||||
echo ""; \
|
||||
echo "=== Crime dataset not found ==="; \
|
||||
echo "Place police.uk crime CSVs in $(CRIME_DIR)/"; \
|
||||
echo "Download from https://data.police.uk/data/"; \
|
||||
echo ""; \
|
||||
exit 1; \
|
||||
fi
|
||||
$(CRIME): $(CRIME_STAMP)
|
||||
uv run python -m pipeline.transform.crime --input $(CRIME_DIR) --output $@
|
||||
|
||||
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue