Move transform logic around
This commit is contained in:
parent
e1b38a1b95
commit
38b0cf1ea1
14 changed files with 1073 additions and 336 deletions
155
Taskfile.yml
155
Taskfile.yml
|
|
@ -1,38 +1,136 @@
|
|||
version: '3'
|
||||
|
||||
vars:
|
||||
DATA_DIR: data
|
||||
ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
|
||||
PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
|
||||
IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
|
||||
POIS_RAW_OUTPUT: "{{.DATA_DIR}}/uk_pois.parquet"
|
||||
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||
POI_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/poi_proximity.parquet"
|
||||
EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet"
|
||||
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
|
||||
EPC_CSV: "{{.DATA_DIR}}/epc/certificates.csv"
|
||||
JOURNEY_TIMES: "{{.DATA_DIR}}/journey_times_bank_checkpoint.parquet"
|
||||
|
||||
tasks:
|
||||
install:
|
||||
desc: Install dependencies, generate client, and download data
|
||||
desc: Install dependencies
|
||||
cmds:
|
||||
- uv sync
|
||||
- cd frontend && npm install
|
||||
|
||||
download:
|
||||
desc: Download data
|
||||
deps:
|
||||
- install
|
||||
download:arcgis:
|
||||
desc: Download and convert ArcGIS postcode data
|
||||
sources:
|
||||
- pipeline/download/arcgis.py
|
||||
generates:
|
||||
- "{{.ARCGIS_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run -m pipeline.download.arcgis
|
||||
- uv run -m pipeline.download.pois
|
||||
- uv run -m pipeline.download.deprivation_data
|
||||
- uv run -m pipeline.download.price_paid
|
||||
- uv run python -m pipeline.download.arcgis --output {{.ARCGIS_OUTPUT}}
|
||||
|
||||
pipeline:
|
||||
desc: Run data processing pipeline
|
||||
deps:
|
||||
- download
|
||||
download:price-paid:
|
||||
desc: Download and convert Land Registry price-paid data
|
||||
sources:
|
||||
- pipeline/download/price_paid.py
|
||||
generates:
|
||||
- "{{.PRICE_PAID_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.run
|
||||
- uv run python -m pipeline.download.price_paid --output {{.PRICE_PAID_OUTPUT}}
|
||||
|
||||
download:deprivation:
|
||||
desc: Download and convert Index of Deprivation data
|
||||
sources:
|
||||
- pipeline/download/deprivation_data.py
|
||||
generates:
|
||||
- "{{.IOD_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.deprivation_data --output {{.IOD_OUTPUT}}
|
||||
|
||||
download:pois:
|
||||
desc: Download and extract POIs from OpenStreetMap
|
||||
sources:
|
||||
- pipeline/download/pois.py
|
||||
generates:
|
||||
- "{{.POIS_RAW_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.download.pois --output {{.POIS_RAW_OUTPUT}}
|
||||
|
||||
transform:pois:
|
||||
desc: Transform raw POIs to filtered version with friendly names
|
||||
deps:
|
||||
- download:pois
|
||||
sources:
|
||||
- pipeline/transform/transform_poi.py
|
||||
- "{{.POIS_RAW_OUTPUT}}"
|
||||
generates:
|
||||
- "{{.POIS_FILTERED_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.transform_poi --input {{.POIS_RAW_OUTPUT}} --output {{.POIS_FILTERED_OUTPUT}}
|
||||
|
||||
transform:epc-pp:
|
||||
desc: Fuzzy join EPC and Price Paid data
|
||||
deps:
|
||||
- download:price-paid
|
||||
sources:
|
||||
- pipeline/transform/join_epc_pp.py
|
||||
- pipeline/utils/fuzzy_join.py
|
||||
- "{{.PRICE_PAID_OUTPUT}}"
|
||||
- "{{.EPC_CSV}}"
|
||||
generates:
|
||||
- "{{.EPC_PP_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.join_epc_pp --epc {{.EPC_CSV}} --price-paid {{.PRICE_PAID_OUTPUT}} --output {{.EPC_PP_OUTPUT}}
|
||||
|
||||
transform:poi-proximity:
|
||||
desc: Compute POI proximity counts per postcode
|
||||
deps:
|
||||
- download:arcgis
|
||||
- transform:pois
|
||||
sources:
|
||||
- pipeline/transform/poi_proximity.py
|
||||
- pipeline/utils/poi_counts.py
|
||||
- "{{.ARCGIS_OUTPUT}}"
|
||||
- "{{.POIS_FILTERED_OUTPUT}}"
|
||||
generates:
|
||||
- "{{.POI_PROXIMITY_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.poi_proximity --arcgis {{.ARCGIS_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --output {{.POI_PROXIMITY_OUTPUT}}
|
||||
|
||||
transform:wide:
|
||||
desc: Build wide property dataframe with all joins
|
||||
deps:
|
||||
- join:epc-pp
|
||||
- download:arcgis
|
||||
- download:deprivation
|
||||
- transform:poi-proximity
|
||||
sources:
|
||||
- pipeline/transform/merge.py
|
||||
- "{{.EPC_PP_OUTPUT}}"
|
||||
- "{{.ARCGIS_OUTPUT}}"
|
||||
- "{{.IOD_OUTPUT}}"
|
||||
- "{{.POI_PROXIMITY_OUTPUT}}"
|
||||
generates:
|
||||
- "{{.WIDE_OUTPUT}}"
|
||||
cmds:
|
||||
- uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --output {{.WIDE_OUTPUT}}
|
||||
|
||||
prepare:
|
||||
desc: Prepare the application (install, download data, run pipeline)
|
||||
deps:
|
||||
- pipeline
|
||||
- transform:wide
|
||||
|
||||
test:
|
||||
cmds:
|
||||
- uv run -m pipeline.utils.test_fuzzy_join
|
||||
- uv run pytest pipeline/utils/test_haversine.py
|
||||
- uv run pytest pipeline/utils/test_poi_counts.py
|
||||
|
||||
server:
|
||||
desc: Run FastAPI backend on port 8001
|
||||
desc: Run Rust backend on port 8001
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- uv run fastapi dev server/main.py --port 8001
|
||||
- cargo run --release -- {{.WIDE_OUTPUT}}
|
||||
|
||||
frontend:
|
||||
desc: Run frontend dev server on port 3030 (proxies /api to :8001)
|
||||
|
|
@ -46,16 +144,13 @@ tasks:
|
|||
cmds:
|
||||
- npm run build
|
||||
|
||||
prod:
|
||||
desc: Run production server (serves built frontend)
|
||||
cmds:
|
||||
- uv run fastapi run server/main.py --port 8001
|
||||
|
||||
lint:
|
||||
desc: Lint all code (Python and TypeScript)
|
||||
desc: Lint all code (Python, TypeScript, and Rust)
|
||||
cmds:
|
||||
- task: lint:python
|
||||
- task: lint:frontend
|
||||
- task: lint:rust
|
||||
|
||||
lint:python:
|
||||
desc: Lint Python code with ruff
|
||||
|
|
@ -69,11 +164,19 @@ tasks:
|
|||
- npm run lint
|
||||
- npm run format:check
|
||||
|
||||
lint:rust:
|
||||
desc: Lint Rust code with clippy and check formatting
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo clippy -- -D warnings
|
||||
- cargo fmt --check
|
||||
|
||||
format:
|
||||
desc: Format all code (Python and TypeScript)
|
||||
desc: Format all code (Python, TypeScript, and Rust)
|
||||
cmds:
|
||||
- task: format:python
|
||||
- task: format:frontend
|
||||
- task: format:rust
|
||||
|
||||
format:python:
|
||||
desc: Format Python code with ruff
|
||||
|
|
@ -88,6 +191,12 @@ tasks:
|
|||
- npm run lint:fix
|
||||
- npm run format
|
||||
|
||||
format:rust:
|
||||
desc: Format Rust code with cargo fmt
|
||||
dir: server-rs
|
||||
cmds:
|
||||
- cargo fmt
|
||||
|
||||
check:
|
||||
desc: Run all checks (lint, typecheck, build)
|
||||
cmds:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue