Break up taskfile
This commit is contained in:
parent
8e615c6aad
commit
5e210e14bd
2 changed files with 211 additions and 104 deletions
191
Taskfile.data.yml
Normal file
191
Taskfile.data.yml
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
version: '3'
|
||||
|
||||
vars:
|
||||
DATA_DIR: /bulk/property-data
|
||||
ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
|
||||
PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
|
||||
IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
|
||||
POIS_RAW_OUTPUT: "{{.DATA_DIR}}/uk_pois.parquet"
|
||||
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||
POI_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/poi_proximity.parquet"
|
||||
EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet"
|
||||
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
|
||||
EPC: "{{.DATA_DIR}}/certificates.csv"
|
||||
JOURNEY_TIMES: "./data_sources/processed/journey_times_bank_checkpoint.parquet"
|
||||
ETHNICITY_OUTPUT: "{{.DATA_DIR}}/ethnicity_by_la.parquet"
|
||||
CRIME_DIR: "{{.DATA_DIR}}/crime"
|
||||
CRIME_OUTPUT: "{{.DATA_DIR}}/crime_by_lsoa.parquet"
|
||||
NOISE_OUTPUT: "{{.DATA_DIR}}/road_noise.parquet"
|
||||
OFSTED_OUTPUT: "{{.DATA_DIR}}/ofsted.parquet"
|
||||
NAPTAN_OUTPUT: "{{.DATA_DIR}}/naptan.parquet"
|
||||
BROADBAND_OUTPUT: "{{.DATA_DIR}}/broadband.parquet"
|
||||
SCHOOL_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/school_proximity.parquet"
|
||||
|
||||
tasks:
|
||||
prompt:epc:
|
||||
desc: Prompt user to download EPC dataset (requires registration)
|
||||
status:
|
||||
- test -f {{.EPC}}
|
||||
cmds:
|
||||
- |
|
||||
echo ""
|
||||
echo "=== EPC dataset not found ==="
|
||||
echo "The EPC certificates file is required: {{.EPC}}"
|
||||
echo ""
|
||||
echo "To obtain it, register at https://epc.opendatacommunities.org/login"
|
||||
echo ""
|
||||
exit 1
|
||||
|
||||
prompt:journey-times:
|
||||
desc: Download TFL journey times if missing (requires API key registration)
|
||||
status:
|
||||
- test -f {{.JOURNEY_TIMES}}
|
||||
deps:
|
||||
- download:arcgis
|
||||
cmds:
|
||||
- |
|
||||
echo ""
|
||||
echo "=== TFL journey times not found ==="
|
||||
echo "Register for a TFL API key at https://api-portal.tfl.gov.uk/signin"
|
||||
echo "Then set the TFL_API_KEY environment variable and re-run this task."
|
||||
echo ""
|
||||
exit 1
|
||||
|
||||
download:arcgis:
|
||||
desc: Download and convert ArcGIS postcode data
|
||||
status:
|
||||
- test -f {{.ARCGIS_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.arcgis --output {{.ARCGIS_OUTPUT}}
|
||||
|
||||
download:price-paid:
|
||||
desc: Download and convert Land Registry price-paid data
|
||||
status:
|
||||
- test -f {{.PRICE_PAID_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.price_paid --output {{.PRICE_PAID_OUTPUT}}
|
||||
|
||||
download:deprivation:
|
||||
desc: Download and convert Index of Deprivation data
|
||||
status:
|
||||
- test -f {{.IOD_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.deprivation_data --output {{.IOD_OUTPUT}}
|
||||
|
||||
download:ethnicity:
|
||||
desc: Download ethnicity by local authority data
|
||||
status:
|
||||
- test -f {{.ETHNICITY_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.ethnicity --output {{.ETHNICITY_OUTPUT}}
|
||||
|
||||
download:naptan:
|
||||
desc: Download NaPTAN station data
|
||||
status:
|
||||
- test -f {{.NAPTAN_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.naptan --output {{.NAPTAN_OUTPUT}}
|
||||
|
||||
download:pois:
|
||||
desc: Download and extract POIs from OpenStreetMap
|
||||
status:
|
||||
- test -f {{.POIS_RAW_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.pois --output {{.POIS_RAW_OUTPUT}}
|
||||
|
||||
download:ofsted:
|
||||
desc: Download Ofsted school inspection outcomes
|
||||
status:
|
||||
- test -f {{.OFSTED_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.ofsted --output {{.OFSTED_OUTPUT}}
|
||||
|
||||
download:broadband:
|
||||
desc: Download Ofcom broadband performance data
|
||||
status:
|
||||
- test -f {{.BROADBAND_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.broadband --output {{.BROADBAND_OUTPUT}}
|
||||
|
||||
download:noise:
|
||||
desc: Download Defra noise data (road, rail, airport) sampled at postcode centroids
|
||||
deps:
|
||||
- download:arcgis
|
||||
status:
|
||||
- test -f {{.NOISE_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.noise --arcgis {{.ARCGIS_OUTPUT}} --output {{.NOISE_OUTPUT}}
|
||||
|
||||
transform:pois:
|
||||
desc: Transform raw POIs to filtered version with friendly names
|
||||
deps:
|
||||
- download:pois
|
||||
- download:naptan
|
||||
status:
|
||||
- test -f {{.POIS_FILTERED_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.transform_poi --input {{.POIS_RAW_OUTPUT}} --naptan {{.NAPTAN_OUTPUT}} --output {{.POIS_FILTERED_OUTPUT}}
|
||||
|
||||
transform:epc-pp:
|
||||
desc: Fuzzy join EPC and Price Paid data
|
||||
deps:
|
||||
- download:price-paid
|
||||
- prompt:epc
|
||||
status:
|
||||
- test -f {{.EPC_PP_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.join_epc_pp --epc {{.EPC}} --price-paid {{.PRICE_PAID_OUTPUT}} --output {{.EPC_PP_OUTPUT}}
|
||||
|
||||
transform:crime:
|
||||
desc: Transform crime CSVs into yearly averages by LSOA
|
||||
status:
|
||||
- test -f {{.CRIME_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.crime --input {{.CRIME_DIR}} --output {{.CRIME_OUTPUT}}
|
||||
|
||||
transform:poi-proximity:
|
||||
desc: Compute POI proximity counts per postcode
|
||||
deps:
|
||||
- download:arcgis
|
||||
- transform:pois
|
||||
status:
|
||||
- test -f {{.POI_PROXIMITY_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.poi_proximity --arcgis {{.ARCGIS_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --output {{.POI_PROXIMITY_OUTPUT}}
|
||||
|
||||
transform:school-proximity:
|
||||
desc: Compute good+ school proximity counts per postcode
|
||||
deps:
|
||||
- download:ofsted
|
||||
- download:arcgis
|
||||
status:
|
||||
- test -f {{.SCHOOL_PROXIMITY_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.school_proximity --ofsted {{.OFSTED_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --output {{.SCHOOL_PROXIMITY_OUTPUT}}
|
||||
|
||||
download:journey-times:
|
||||
desc: Fetch TfL journey times for all postcodes
|
||||
deps:
|
||||
- download:arcgis
|
||||
status:
|
||||
- test -f {{.JOURNEY_TIMES}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.journey_times
|
||||
|
||||
prepare:
|
||||
desc: Build wide property dataframe with all joins
|
||||
deps:
|
||||
- transform:epc-pp
|
||||
- download:arcgis
|
||||
- download:deprivation
|
||||
- download:ethnicity
|
||||
- download:broadband
|
||||
- download:noise
|
||||
- transform:crime
|
||||
- transform:poi-proximity
|
||||
- transform:school-proximity
|
||||
- prompt:journey-times
|
||||
status:
|
||||
- test -f {{.WIDE_OUTPUT}}
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --ethnicity {{.ETHNICITY_OUTPUT}} --crime {{.CRIME_OUTPUT}} --noise {{.NOISE_OUTPUT}} --school-proximity {{.SCHOOL_PROXIMITY_OUTPUT}} --broadband {{.BROADBAND_OUTPUT}} --output {{.WIDE_OUTPUT}}
|
||||
124
Taskfile.yml
124
Taskfile.yml
|
|
@ -1,17 +1,14 @@
|
|||
version: '3'
|
||||
|
||||
includes:
|
||||
data:
|
||||
taskfile: ./Taskfile.data.yml
|
||||
flatten: true
|
||||
|
||||
vars:
|
||||
DATA_DIR: data
|
||||
ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
|
||||
PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
|
||||
IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
|
||||
POIS_RAW_OUTPUT: "{{.DATA_DIR}}/uk_pois.parquet"
|
||||
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||
POI_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/poi_proximity.parquet"
|
||||
EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet"
|
||||
DATA_DIR: /bulk/property-data
|
||||
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
|
||||
EPC_CSV: "{{.DATA_DIR}}/epc/certificates.csv"
|
||||
JOURNEY_TIMES: "{{.DATA_DIR}}/journey_times_bank_checkpoint.parquet"
|
||||
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||
|
||||
tasks:
|
||||
install:
|
||||
|
|
@ -20,94 +17,6 @@ tasks:
|
|||
- uv sync
|
||||
- cd frontend && npm install
|
||||
|
||||
download:arcgis:
|
||||
internal: true
|
||||
desc: Download and convert ArcGIS postcode data
|
||||
generates:
|
||||
- "{{.ARCGIS_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.arcgis --output {{.ARCGIS_OUTPUT}}
|
||||
|
||||
download:price-paid:
|
||||
internal: true
|
||||
desc: Download and convert Land Registry price-paid data
|
||||
generates:
|
||||
- "{{.PRICE_PAID_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.price_paid --output {{.PRICE_PAID_OUTPUT}}
|
||||
|
||||
download:deprivation:
|
||||
internal: true
|
||||
desc: Download and convert Index of Deprivation data
|
||||
generates:
|
||||
- "{{.IOD_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.deprivation_data --output {{.IOD_OUTPUT}}
|
||||
|
||||
download:pois:
|
||||
internal: true
|
||||
desc: Download and extract POIs from OpenStreetMap
|
||||
generates:
|
||||
- "{{.POIS_RAW_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.pois --output {{.POIS_RAW_OUTPUT}}
|
||||
|
||||
transform:pois:
|
||||
internal: true
|
||||
desc: Transform raw POIs to filtered version with friendly names
|
||||
deps:
|
||||
- download:pois
|
||||
sources:
|
||||
- "{{.POIS_RAW_OUTPUT}}"
|
||||
generates:
|
||||
- "{{.POIS_FILTERED_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.transform_poi --input {{.POIS_RAW_OUTPUT}} --output {{.POIS_FILTERED_OUTPUT}}
|
||||
|
||||
transform:epc-pp:
|
||||
internal: true
|
||||
desc: Fuzzy join EPC and Price Paid data
|
||||
deps:
|
||||
- download:price-paid
|
||||
sources:
|
||||
- "{{.PRICE_PAID_OUTPUT}}"
|
||||
- "{{.EPC_CSV}}"
|
||||
generates:
|
||||
- "{{.EPC_PP_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.join_epc_pp --epc {{.EPC_CSV}} --price-paid {{.PRICE_PAID_OUTPUT}} --output {{.EPC_PP_OUTPUT}}
|
||||
|
||||
transform:poi-proximity:
|
||||
internal: true
|
||||
desc: Compute POI proximity counts per postcode
|
||||
deps:
|
||||
- download:arcgis
|
||||
- transform:pois
|
||||
sources:
|
||||
- "{{.ARCGIS_OUTPUT}}"
|
||||
- "{{.POIS_FILTERED_OUTPUT}}"
|
||||
generates:
|
||||
- "{{.POI_PROXIMITY_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.poi_proximity --arcgis {{.ARCGIS_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --output {{.POI_PROXIMITY_OUTPUT}}
|
||||
|
||||
prepare:
|
||||
desc: Build wide property dataframe with all joins
|
||||
deps:
|
||||
- join:epc-pp
|
||||
- download:arcgis
|
||||
- download:deprivation
|
||||
- transform:poi-proximity
|
||||
sources:
|
||||
- "{{.EPC_PP_OUTPUT}}"
|
||||
- "{{.ARCGIS_OUTPUT}}"
|
||||
- "{{.IOD_OUTPUT}}"
|
||||
- "{{.POI_PROXIMITY_OUTPUT}}"
|
||||
generates:
|
||||
- "{{.WIDE_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --output {{.WIDE_OUTPUT}}
|
||||
|
||||
test:
|
||||
cmds:
|
||||
- uv run -m pipeline.utils.test_fuzzy_join
|
||||
|
|
@ -115,10 +24,16 @@ tasks:
|
|||
- uv run pytest pipeline/utils/test_poi_counts.py
|
||||
|
||||
dev:server:
|
||||
desc: Run Rust backend on port 8001
|
||||
desc: Run Rust backend on port 8001 (debug build, fast compile)
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo run --release -- {{.WIDE_OUTPUT}}
|
||||
- cargo run -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
|
||||
|
||||
dev:server:release:
|
||||
desc: Run Rust backend on port 8001 (release build)
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo run --release -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
|
||||
|
||||
dev:frontend:
|
||||
desc: Run frontend dev server on port 3030 (proxies /api to :8001)
|
||||
|
|
@ -128,7 +43,7 @@ tasks:
|
|||
|
||||
build:server:
|
||||
desc: Build server for production
|
||||
dir: frontend
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo build --release
|
||||
|
||||
|
|
@ -147,9 +62,10 @@ tasks:
|
|||
- task: lint:rust
|
||||
|
||||
lint:python:
|
||||
desc: Lint Python code with ruff
|
||||
desc: Lint Python code with ruff and check for unused dependencies
|
||||
cmds:
|
||||
- uv run ruff check .
|
||||
- uv run deptry .
|
||||
|
||||
lint:frontend:
|
||||
desc: Lint frontend TypeScript code
|
||||
|
|
@ -159,11 +75,12 @@ tasks:
|
|||
- npm run format:check
|
||||
|
||||
lint:rust:
|
||||
desc: Lint Rust code with clippy and check formatting
|
||||
desc: Lint Rust code with clippy, check formatting, and detect unused dependencies
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo clippy -- -D warnings
|
||||
- cargo fmt --check
|
||||
- cargo machete
|
||||
|
||||
format:
|
||||
desc: Format all code (Python, TypeScript, and Rust)
|
||||
|
|
@ -198,4 +115,3 @@ tasks:
|
|||
- task: build:server
|
||||
- task: build:frontend
|
||||
- task: test
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue