Faster scraping

This commit is contained in:
Andras Schmelczer 2026-03-20 07:52:22 +00:00
parent 05b8ee06c1
commit 852bb3f3a7
4 changed files with 437 additions and 324 deletions

View file

@ -4,8 +4,8 @@ from pathlib import Path
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
DATA_DIR = Path("/app/data")
PAGE_SIZE = 24
DELAY_BETWEEN_PAGES = 1.0
DELAY_BETWEEN_OUTCODES = 2.0
DELAY_BETWEEN_PAGES = 0.5
DELAY_BETWEEN_OUTCODES = 1.0
MAX_RETRIES = 3
RETRY_BASE_DELAY = 2.0
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
@ -67,6 +67,7 @@ PROPERTY_TYPE_MAP = {
"Apartment": "Flats/Maisonettes",
"Penthouse": "Flats/Maisonettes",
"Ground Flat": "Flats/Maisonettes",
"Duplex": "Flats/Maisonettes",
"Detached Bungalow": "Detached",
"Semi-Detached Bungalow": "Semi-Detached",
"Town House": "Terraced",
@ -75,9 +76,15 @@ PROPERTY_TYPE_MAP = {
"Bungalow": "Other",
"Cottage": "Other",
"Park Home": "Other",
"Mobile Home": "Other",
"Caravan": "Other",
"Lodge": "Other",
"Land": "Other",
"Farm / Barn": "Other",
"Farm House": "Other",
"House": "Detached",
"House of Multiple Occupation": "Flats/Maisonettes",
"House Share": "Other",
"Not Specified": "Other",
"Chalet": "Other",
"Barn Conversion": "Other",
@ -85,9 +92,20 @@ PROPERTY_TYPE_MAP = {
"Character Property": "Other",
"Cluster House": "Other",
"Retirement Property": "Flats/Maisonettes",
"Parking": "Other",
"Plot": "Other",
"Garages": "Other",
"Mews": "Terraced",
"Property": "Other",
# Lowercase variants (from home.co.uk / Rightmove APIs)
"house": "Detached",
"bungalow": "Other",
"townhouse": "Terraced",
"land": "Other",
"other": "Other",
"not-specified": "Other",
"retirement-property": "Flats/Maisonettes",
"equestrian-facility": "Other",
}
CHANNELS = [