Break up taskfile

This commit is contained in:
Andras Schmelczer 2026-02-01 13:05:00 +00:00
parent 8e615c6aad
commit 5e210e14bd
2 changed files with 211 additions and 104 deletions

View file

@ -1,17 +1,14 @@
version: '3'
includes:
data:
taskfile: ./Taskfile.data.yml
flatten: true
vars:
DATA_DIR: data
ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
POIS_RAW_OUTPUT: "{{.DATA_DIR}}/uk_pois.parquet"
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
POI_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/poi_proximity.parquet"
EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet"
DATA_DIR: /bulk/property-data
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
EPC_CSV: "{{.DATA_DIR}}/epc/certificates.csv"
JOURNEY_TIMES: "{{.DATA_DIR}}/journey_times_bank_checkpoint.parquet"
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
tasks:
install:
@ -20,94 +17,6 @@ tasks:
- uv sync
- cd frontend && npm install
download:arcgis:
internal: true
desc: Download and convert ArcGIS postcode data
generates:
- "{{.ARCGIS_OUTPUT}}"
cmds:
- uv run python -m pipeline.download.arcgis --output {{.ARCGIS_OUTPUT}}
download:price-paid:
internal: true
desc: Download and convert Land Registry price-paid data
generates:
- "{{.PRICE_PAID_OUTPUT}}"
cmds:
- uv run python -m pipeline.download.price_paid --output {{.PRICE_PAID_OUTPUT}}
download:deprivation:
internal: true
desc: Download and convert Index of Deprivation data
generates:
- "{{.IOD_OUTPUT}}"
cmds:
- uv run python -m pipeline.download.deprivation_data --output {{.IOD_OUTPUT}}
download:pois:
internal: true
desc: Download and extract POIs from OpenStreetMap
generates:
- "{{.POIS_RAW_OUTPUT}}"
cmds:
- uv run python -m pipeline.download.pois --output {{.POIS_RAW_OUTPUT}}
transform:pois:
internal: true
desc: Transform raw POIs to filtered version with friendly names
deps:
- download:pois
sources:
- "{{.POIS_RAW_OUTPUT}}"
generates:
- "{{.POIS_FILTERED_OUTPUT}}"
cmds:
- uv run python -m pipeline.transform.transform_poi --input {{.POIS_RAW_OUTPUT}} --output {{.POIS_FILTERED_OUTPUT}}
transform:epc-pp:
internal: true
desc: Fuzzy join EPC and Price Paid data
deps:
- download:price-paid
sources:
- "{{.PRICE_PAID_OUTPUT}}"
- "{{.EPC_CSV}}"
generates:
- "{{.EPC_PP_OUTPUT}}"
cmds:
- uv run python -m pipeline.transform.join_epc_pp --epc {{.EPC_CSV}} --price-paid {{.PRICE_PAID_OUTPUT}} --output {{.EPC_PP_OUTPUT}}
transform:poi-proximity:
internal: true
desc: Compute POI proximity counts per postcode
deps:
- download:arcgis
- transform:pois
sources:
- "{{.ARCGIS_OUTPUT}}"
- "{{.POIS_FILTERED_OUTPUT}}"
generates:
- "{{.POI_PROXIMITY_OUTPUT}}"
cmds:
- uv run python -m pipeline.transform.poi_proximity --arcgis {{.ARCGIS_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --output {{.POI_PROXIMITY_OUTPUT}}
prepare:
desc: Build wide property dataframe with all joins
deps:
- join:epc-pp
- download:arcgis
- download:deprivation
- transform:poi-proximity
sources:
- "{{.EPC_PP_OUTPUT}}"
- "{{.ARCGIS_OUTPUT}}"
- "{{.IOD_OUTPUT}}"
- "{{.POI_PROXIMITY_OUTPUT}}"
generates:
- "{{.WIDE_OUTPUT}}"
cmds:
- uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --output {{.WIDE_OUTPUT}}
test:
cmds:
- uv run -m pipeline.utils.test_fuzzy_join
@ -115,10 +24,16 @@ tasks:
- uv run pytest pipeline/utils/test_poi_counts.py
dev:server:
desc: Run Rust backend on port 8001
desc: Run Rust backend on port 8001 (debug build, fast compile)
dir: server-rs
cmds:
- cargo run --release -- {{.WIDE_OUTPUT}}
- cargo run -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
dev:server:release:
desc: Run Rust backend on port 8001 (release build)
dir: server-rs
cmds:
- cargo run --release -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
dev:frontend:
desc: Run frontend dev server on port 3030 (proxies /api to :8001)
@ -128,7 +43,7 @@ tasks:
build:server:
desc: Build server for production
dir: frontend
dir: server-rs
cmds:
- cargo build --release
@ -147,9 +62,10 @@ tasks:
- task: lint:rust
lint:python:
desc: Lint Python code with ruff
desc: Lint Python code with ruff and check for unused dependencies
cmds:
- uv run ruff check .
- uv run deptry .
lint:frontend:
desc: Lint frontend TypeScript code
@ -159,11 +75,12 @@ tasks:
- npm run format:check
lint:rust:
desc: Lint Rust code with clippy and check formatting
desc: Lint Rust code with clippy, check formatting, and detect unused dependencies
dir: server-rs
cmds:
- cargo clippy -- -D warnings
- cargo fmt --check
- cargo machete
format:
desc: Format all code (Python, TypeScript, and Rust)
@ -198,4 +115,3 @@ tasks:
- task: build:server
- task: build:frontend
- task: test