scraping and data
This commit is contained in:
parent
d98819b569
commit
8688b7475e
43 changed files with 4920 additions and 531 deletions
|
|
@ -57,6 +57,16 @@ def parse_args() -> argparse.Namespace:
|
|||
default=DATA_DIR,
|
||||
help=f"Directory for parquet output. Defaults to {DATA_DIR}.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outcodes",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Comma-separated outcodes to scrape (e.g. 'SW9' or 'SW9,E14,BR1') "
|
||||
"instead of the full Greater London set. Must fall within the "
|
||||
"London-ish areas; takes precedence over --test/--limit-outcodes."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit-outcodes",
|
||||
type=int,
|
||||
|
|
@ -116,17 +126,32 @@ def main() -> int:
|
|||
from scraper import (
|
||||
build_postcode_coords,
|
||||
build_postcode_index,
|
||||
filter_londonish_outcodes,
|
||||
load_outcodes,
|
||||
run_scrape,
|
||||
)
|
||||
|
||||
outcodes = load_outcodes()
|
||||
if args.test and args.limit_outcodes is None:
|
||||
preferred = [outcode for outcode in TEST_OUTCODES if outcode in set(outcodes)]
|
||||
if preferred:
|
||||
outcodes = preferred
|
||||
if args.limit_outcodes is not None:
|
||||
outcodes = outcodes[: args.limit_outcodes]
|
||||
if args.outcodes is not None:
|
||||
requested = [code.strip().upper() for code in args.outcodes.split(",") if code.strip()]
|
||||
if not requested:
|
||||
raise SystemExit("--outcodes was empty")
|
||||
outcodes = filter_londonish_outcodes(requested)
|
||||
dropped = sorted(set(requested) - set(outcodes))
|
||||
if dropped:
|
||||
log.warning("Ignoring outcodes outside the Greater London-ish areas: %s", ", ".join(dropped))
|
||||
if not outcodes:
|
||||
raise SystemExit(
|
||||
"None of the requested outcodes are within the Greater London-ish areas "
|
||||
f"({', '.join(requested)})."
|
||||
)
|
||||
else:
|
||||
outcodes = load_outcodes()
|
||||
if args.test and args.limit_outcodes is None:
|
||||
preferred = [outcode for outcode in TEST_OUTCODES if outcode in set(outcodes)]
|
||||
if preferred:
|
||||
outcodes = preferred
|
||||
if args.limit_outcodes is not None:
|
||||
outcodes = outcodes[: args.limit_outcodes]
|
||||
|
||||
if not outcodes:
|
||||
raise SystemExit("No Greater London-ish outcodes loaded; nothing to scrape.")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue