From c6f869e95fad395ff7c7fe61fff2c853b2b4bcdf Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Mon, 2 Feb 2026 22:28:51 +0000 Subject: [PATCH] Add tax bands and update journey times --- Taskfile.data.yml | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/Taskfile.data.yml b/Taskfile.data.yml index 5907545..4e6bd4c 100644 --- a/Taskfile.data.yml +++ b/Taskfile.data.yml @@ -11,7 +11,7 @@ vars: EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet" WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet" EPC: "{{.DATA_DIR}}/certificates.csv" - JOURNEY_TIMES: "./data_sources/processed/journey_times_bank_checkpoint.parquet" + JOURNEY_TIMES: "{{.DATA_DIR}}/journey_times.parquet" ETHNICITY_OUTPUT: "{{.DATA_DIR}}/ethnicity_by_la.parquet" CRIME_DIR: "{{.DATA_DIR}}/crime" CRIME_OUTPUT: "{{.DATA_DIR}}/crime_by_lsoa.parquet" @@ -20,6 +20,8 @@ vars: NAPTAN_OUTPUT: "{{.DATA_DIR}}/naptan.parquet" BROADBAND_OUTPUT: "{{.DATA_DIR}}/broadband.parquet" SCHOOL_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/school_proximity.parquet" + COUNCIL_TAX_OUTPUT: "{{.DATA_DIR}}/council_tax.parquet" + COUNCIL_TAX_BANDS_OUTPUT: "{{.DATA_DIR}}/council_tax_bands.parquet" tasks: prompt:epc: @@ -107,6 +109,20 @@ tasks: cmds: - uv run python -m pipeline.download.broadband --output {{.BROADBAND_OUTPUT}} + download:council-tax: + desc: Download council tax rates by local authority (GOV.UK Table 9) + status: + - test -f {{.COUNCIL_TAX_OUTPUT}} + cmds: + - uv run python -m pipeline.download.council_tax --output {{.COUNCIL_TAX_OUTPUT}} + + download:council-tax-bands: + desc: Scrape individual property council tax bands from VOA + status: + - test -f {{.COUNCIL_TAX_BANDS_OUTPUT}} + cmds: + - uv run python -m pipeline.download.council_tax_bands --postcodes {{.WIDE_OUTPUT}} --output {{.COUNCIL_TAX_BANDS_OUTPUT}} + download:noise: desc: Download Defra noise data (road, rail, airport) sampled at postcode centroids deps: @@ -164,13 +180,15 @@ tasks: - uv run python -m pipeline.transform.school_proximity --ofsted {{.OFSTED_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --output {{.SCHOOL_PROXIMITY_OUTPUT}} download:journey-times: - desc: Fetch TfL journey times for all postcodes + desc: "Fetch TfL journey times: task download:journey-times -- " deps: - download:arcgis + requires: + vars: [CLI_ARGS] status: - - test -f {{.JOURNEY_TIMES}} + - test -f {{.DATA_DIR}}/journey_times_{{.CLI_ARGS}}.parquet cmds: - - uv run python -m pipeline.journey_times + - uv run python -m pipeline.journey_times --destination {{.CLI_ARGS}} --output-dir {{.DATA_DIR}} --postcodes {{.ARCGIS_OUTPUT}} prepare: desc: Build wide property dataframe with all joins @@ -181,6 +199,7 @@ tasks: - download:ethnicity - download:broadband - download:noise + - download:council-tax - transform:crime - transform:poi-proximity - transform:school-proximity @@ -188,4 +207,18 @@ tasks: status: - test -f {{.WIDE_OUTPUT}} cmds: - - uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --ethnicity {{.ETHNICITY_OUTPUT}} --crime {{.CRIME_OUTPUT}} --noise {{.NOISE_OUTPUT}} --school-proximity {{.SCHOOL_PROXIMITY_OUTPUT}} --broadband {{.BROADBAND_OUTPUT}} --output {{.WIDE_OUTPUT}} + - >- + uv run python -m pipeline.transform.merge + --epc-pp {{.EPC_PP_OUTPUT}} + --arcgis {{.ARCGIS_OUTPUT}} + --iod {{.IOD_OUTPUT}} + --poi-proximity {{.POI_PROXIMITY_OUTPUT}} + --journey-times {{.JOURNEY_TIMES}} + --ethnicity {{.ETHNICITY_OUTPUT}} + --crime {{.CRIME_OUTPUT}} + --noise {{.NOISE_OUTPUT}} + --school-proximity {{.SCHOOL_PROXIMITY_OUTPUT}} + --broadband {{.BROADBAND_OUTPUT}} + --council-tax {{.COUNCIL_TAX_OUTPUT}} + --council-tax-bands {{.COUNCIL_TAX_BANDS_OUTPUT}} + --output {{.WIDE_OUTPUT}}