Fix data pipelines once and for all
This commit is contained in:
parent
08560476c5
commit
4012e4e047
46 changed files with 4508 additions and 855 deletions
|
|
@ -48,7 +48,8 @@ NAPTAN := $(DATA_DIR)/naptan.parquet
|
|||
BROADBAND := $(DATA_DIR)/broadband.parquet
|
||||
CONSERVATION_AREAS := $(DATA_DIR)/conservation_areas.geojson
|
||||
LISTED_BUILDINGS := $(DATA_DIR)/listed_buildings.gpkg
|
||||
SCHOOL_PROX := $(DATA_DIR)/school_proximity.parquet
|
||||
SCHOOL_CATCH := $(DATA_DIR)/school_catchments.parquet
|
||||
LSOA_CHILDREN := $(DATA_DIR)/lsoa_children.parquet
|
||||
RENTAL := $(DATA_DIR)/rental_prices.parquet
|
||||
INSPIRE_DIR := $(DATA_DIR)/inspire
|
||||
OA_BOUNDARIES := $(DATA_DIR)/oa_boundaries.gpkg
|
||||
|
|
@ -100,19 +101,19 @@ PC_BOUNDARIES_DEPS := pipeline/transform/postcode_boundaries/__main__.py \
|
|||
pipeline/transform/postcode_boundaries/voronoi.py
|
||||
CRIME_DOWNLOAD_DEPS := pipeline/download/crime.py
|
||||
INSPIRE_DOWNLOAD_DEPS := pipeline/download/inspire.py
|
||||
TRANSIT_DOWNLOAD_DEPS := pipeline/download/transit_network.py pipeline/download/transxchange2gtfs_shim.js
|
||||
TRANSIT_DOWNLOAD_DEPS := pipeline/download/transit_network.py
|
||||
MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_poi.py
|
||||
|
||||
# ── Phony aliases ─────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: prepare merge tiles satellite-tiles satellite-highres-tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles property-border-tiles \
|
||||
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
||||
download-naptan download-pois download-grocery-retail-points download-ofsted download-gias download-broadband download-conservation-areas download-listed-buildings download-rental-prices \
|
||||
download-naptan download-pois download-grocery-retail-points download-ofsted download-gias download-lsoa-children download-broadband download-conservation-areas download-listed-buildings download-rental-prices \
|
||||
download-postcodes download-noise download-inspire download-crime \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-fr-tow download-nfi download-ofs-register download-places download-median-age download-england-boundary download-rightmove-outcodes \
|
||||
download-map-assets \
|
||||
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
||||
transform-school-proximity transform-tree-density \
|
||||
transform-school-catchments transform-tree-density \
|
||||
generate-postcode-boundaries generate-travel-times enrich-actual-listings
|
||||
|
||||
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles property-border-tiles tree-overlay-tiles crime-hotspot-tiles property-border-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
|
||||
|
|
@ -139,6 +140,7 @@ download-pois: $(POIS_RAW)
|
|||
download-grocery-retail-points: $(GROCERY_RETAIL_POINTS)
|
||||
download-ofsted: $(OFSTED)
|
||||
download-gias: $(GIAS)
|
||||
download-lsoa-children: $(LSOA_CHILDREN)
|
||||
download-broadband: $(BROADBAND)
|
||||
download-conservation-areas: $(CONSERVATION_AREAS)
|
||||
download-listed-buildings: $(LISTED_BUILDINGS)
|
||||
|
|
@ -150,7 +152,7 @@ download-inspire: $(INSPIRE_STAMP)
|
|||
download-oa-boundaries: $(OA_BOUNDARIES)
|
||||
download-uprn-lookup: $(UPRN_LOOKUP)
|
||||
download-transit-network: $(TRANSIT_STAMP)
|
||||
$(VALIDATE_OUTPUTS) --file $(TRANSIT_DIR)/raw/england.osm.pbf --zip $(TRANSIT_DIR)/bods_gtfs.zip --zip $(TRANSIT_DIR)/tfl_gtfs.zip
|
||||
$(VALIDATE_OUTPUTS) --file $(TRANSIT_DIR)/raw/england.osm.pbf --zip $(TRANSIT_DIR)/bods_gtfs.zip
|
||||
download-greenspace: $(GREENSPACE)
|
||||
download-os-greenspace: $(OS_GREENSPACE)
|
||||
download-pbf: $(PBF)
|
||||
|
|
@ -168,11 +170,11 @@ transform-pois: $(POIS_FILTERED)
|
|||
transform-epc-pp: $(EPC_PP)
|
||||
transform-crime: $(CRIME)
|
||||
transform-poi-proximity: $(POI_PROXIMITY)
|
||||
transform-school-proximity: $(SCHOOL_PROX)
|
||||
transform-school-catchments: $(SCHOOL_CATCH)
|
||||
transform-tree-density: $(TREE_DENSITY_PC)
|
||||
generate-postcode-boundaries: $(PC_BOUNDARIES_STAMP)
|
||||
|
||||
$(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(ARCGIS) $(PC_BOUNDARIES_DEPS)
|
||||
$(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(ARCGIS) $(GREENSPACE) $(PC_BOUNDARIES_DEPS)
|
||||
@rm -f $@
|
||||
$(VALIDATE_OUTPUTS) --dir $(INSPIRE_DIR) --zip-glob "$(INSPIRE_DIR)::*.zip"
|
||||
uv run python -m pipeline.transform.postcode_boundaries \
|
||||
|
|
@ -180,6 +182,7 @@ $(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(ARCGI
|
|||
--arcgis $(ARCGIS) \
|
||||
--oa-boundaries $(OA_BOUNDARIES) \
|
||||
--inspire $(INSPIRE_DIR) \
|
||||
--greenspace $(GREENSPACE) \
|
||||
--output $(PC_BOUNDARIES)
|
||||
$(VALIDATE_OUTPUTS) --active-postcode-boundary-match "$(ARCGIS)::$(PC_BOUNDARIES)"
|
||||
@touch $@
|
||||
|
|
@ -273,6 +276,9 @@ $(OFSTED):
|
|||
$(GIAS): pipeline/download/gias.py
|
||||
uv run python -m pipeline.download.gias --output $@
|
||||
|
||||
$(LSOA_CHILDREN): pipeline/download/lsoa_children.py
|
||||
uv run python -m pipeline.download.lsoa_children --output $@
|
||||
|
||||
$(BROADBAND):
|
||||
uv run python -m pipeline.download.broadband --output $@
|
||||
|
||||
|
|
@ -315,7 +321,7 @@ $(UPRN_LOOKUP):
|
|||
$(TRANSIT_STAMP): $(TRANSIT_DOWNLOAD_DEPS)
|
||||
@rm -f $@
|
||||
uv run python -m pipeline.download.transit_network --output $(TRANSIT_DIR)
|
||||
$(VALIDATE_OUTPUTS) --file $(TRANSIT_DIR)/raw/england.osm.pbf --zip $(TRANSIT_DIR)/bods_gtfs.zip --zip $(TRANSIT_DIR)/tfl_gtfs.zip
|
||||
$(VALIDATE_OUTPUTS) --file $(TRANSIT_DIR)/raw/england.osm.pbf --zip $(TRANSIT_DIR)/bods_gtfs.zip
|
||||
@touch $@
|
||||
|
||||
$(RENTAL): pipeline/download/rental_prices.py
|
||||
|
|
@ -364,8 +370,8 @@ $(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(PC_BOUNDARIES_STAMP) pipeline/tran
|
|||
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DEPS)
|
||||
uv run python -m pipeline.transform.poi_proximity --arcgis $(ARCGIS) --pois $(POIS_FILTERED) --greenspace $(OS_GREENSPACE) --output $@
|
||||
|
||||
$(SCHOOL_PROX): $(OFSTED) $(ARCGIS) $(GIAS) pipeline/transform/school_proximity.py pipeline/utils/poi_counts.py
|
||||
uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --gias $(GIAS) --output $@
|
||||
$(SCHOOL_CATCH): $(OFSTED) $(ARCGIS) $(GIAS) $(LSOA_CHILDREN) pipeline/transform/school_catchments.py pipeline/utils/poi_counts.py
|
||||
uv run python -m pipeline.transform.school_catchments --ofsted $(OFSTED) --arcgis $(ARCGIS) --gias $(GIAS) --lsoa-children $(LSOA_CHILDREN) --output $@
|
||||
|
||||
$(TREE_DENSITY_PC): $(FR_TOW) $(NFI) $(ARCGIS) $(TREE_DENSITY_DEPS)
|
||||
uv run python -m pipeline.transform.tree_density \
|
||||
|
|
@ -386,6 +392,7 @@ $(PC_BOUNDARIES):
|
|||
@echo " --arcgis $(ARCGIS) \\"
|
||||
@echo " --oa-boundaries $(OA_BOUNDARIES) \\"
|
||||
@echo " --inspire $(INSPIRE_DIR) \\"
|
||||
@echo " --greenspace $(GREENSPACE) \\"
|
||||
@echo " --output $@"
|
||||
@echo ""
|
||||
@exit 1
|
||||
|
|
@ -393,7 +400,7 @@ $(PC_BOUNDARIES):
|
|||
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
|
||||
|
||||
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) $(MERGE_DEPS)
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_CATCH) $(BROADBAND) $(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) $(MERGE_DEPS)
|
||||
@rm -f $@
|
||||
uv run python -m pipeline.transform.merge \
|
||||
--epc-pp $(EPC_PP) \
|
||||
|
|
@ -403,7 +410,7 @@ $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
|||
--ethnicity $(ETHNICITY) \
|
||||
--crime $(CRIME) \
|
||||
--noise $(NOISE) \
|
||||
--school-proximity $(SCHOOL_PROX) \
|
||||
--school-catchments $(SCHOOL_CATCH) \
|
||||
--broadband $(BROADBAND) \
|
||||
--conservation-areas $(CONSERVATION_AREAS) \
|
||||
--listed-buildings $(LISTED_BUILDINGS) \
|
||||
|
|
@ -433,7 +440,7 @@ $(PRICES_STAMP): $(MERGE_STAMP) $(PRICE_INDEX) $(PRICE_ESTIMATE_DEPS) | $(PROPER
|
|||
|
||||
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
|
||||
$(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_CATCH) $(BROADBAND) \
|
||||
$(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) \
|
||||
$(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) \
|
||||
$(MERGE_DEPS) pipeline/utils/fuzzy_join.py
|
||||
|
|
@ -445,7 +452,7 @@ $(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
|
|||
--ethnicity $(ETHNICITY) \
|
||||
--crime $(CRIME) \
|
||||
--noise $(NOISE) \
|
||||
--school-proximity $(SCHOOL_PROX) \
|
||||
--school-catchments $(SCHOOL_CATCH) \
|
||||
--broadband $(BROADBAND) \
|
||||
--conservation-areas $(CONSERVATION_AREAS) \
|
||||
--listed-buildings $(LISTED_BUILDINGS) \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue