idgf
This commit is contained in:
parent
fbfebc651c
commit
aab85fe32e
33 changed files with 2016 additions and 283 deletions
|
|
@ -352,6 +352,50 @@ def _failures_for_active_postcode_boundary_match(spec: str) -> list[str]:
|
|||
return failures
|
||||
|
||||
|
||||
def _failures_for_postcode_universe(spec: str) -> list[str]:
|
||||
"""Validate that a postcode-features parquet's postcode set is exactly the
|
||||
active-English NSPL/ArcGIS universe. Guards against a truncated or stale
|
||||
postcode.parquet (e.g. an interrupted merge that wrote only a fraction of the
|
||||
~1.49M rows, all otherwise valid) silently passing the build gate, since
|
||||
`_failures_for_postcode_features` only checks per-row validity, not the count.
|
||||
"""
|
||||
arcgis_path, postcodes_path = _split_pair(spec, "postcode universe")
|
||||
failures = _failures_for_parquet(arcgis_path) + _failures_for_parquet(
|
||||
postcodes_path
|
||||
)
|
||||
if failures:
|
||||
return failures
|
||||
|
||||
try:
|
||||
active = _active_english_arcgis_postcodes(arcgis_path)
|
||||
got = _parquet_postcodes(postcodes_path)
|
||||
except Exception as exc:
|
||||
return [
|
||||
f"{arcgis_path} / {postcodes_path}: postcode universe check failed: {exc}"
|
||||
]
|
||||
|
||||
failures = []
|
||||
if len(got) != len(active):
|
||||
failures.append(
|
||||
f"{postcodes_path}: postcode count {len(got):,} != active-English NSPL "
|
||||
f"universe {len(active):,} (from {arcgis_path})"
|
||||
)
|
||||
|
||||
missing = active - got
|
||||
extra = got - active
|
||||
if missing:
|
||||
failures.append(
|
||||
f"{postcodes_path}: {len(missing):,} active English postcodes from "
|
||||
f"{arcgis_path} are missing; sample: {_sample(missing)}"
|
||||
)
|
||||
if extra:
|
||||
failures.append(
|
||||
f"{postcodes_path}: {len(extra):,} postcodes are not active English "
|
||||
f"postcodes in {arcgis_path}; sample: {_sample(extra)}"
|
||||
)
|
||||
return failures
|
||||
|
||||
|
||||
def _failures_for_postcode_features(path: Path) -> list[str]:
|
||||
"""Validate the postcode feature output: unique Postcode, non-null lat/lon
|
||||
inside the England bbox, ctry25cd == E92000001, and every '% ' column in
|
||||
|
|
@ -565,6 +609,15 @@ def main() -> int:
|
|||
"lat/lon in England, ctry25cd=E92000001, '% ' columns in [0,100]"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--postcode-universe",
|
||||
action="append",
|
||||
default=[],
|
||||
help=(
|
||||
"Require postcode parquet keys to equal the active-English NSPL "
|
||||
"universe: ARCGIS::POSTCODES"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--properties-subset",
|
||||
action="append",
|
||||
|
|
@ -599,6 +652,8 @@ def main() -> int:
|
|||
failures.extend(_failures_for_active_postcode_boundary_match(spec))
|
||||
for path in args.postcode_features:
|
||||
failures.extend(_failures_for_postcode_features(path))
|
||||
for spec in args.postcode_universe:
|
||||
failures.extend(_failures_for_postcode_universe(spec))
|
||||
for spec in args.properties_subset:
|
||||
failures.extend(_failures_for_properties_subset(spec))
|
||||
for path in args.price_index:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue