Faster scraping
This commit is contained in:
parent
05b8ee06c1
commit
852bb3f3a7
4 changed files with 437 additions and 324 deletions
|
|
@ -4,8 +4,8 @@ from pathlib import Path
|
|||
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
|
||||
DATA_DIR = Path("/app/data")
|
||||
PAGE_SIZE = 24
|
||||
DELAY_BETWEEN_PAGES = 1.0
|
||||
DELAY_BETWEEN_OUTCODES = 2.0
|
||||
DELAY_BETWEEN_PAGES = 0.5
|
||||
DELAY_BETWEEN_OUTCODES = 1.0
|
||||
MAX_RETRIES = 3
|
||||
RETRY_BASE_DELAY = 2.0
|
||||
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
|
||||
|
|
@ -67,6 +67,7 @@ PROPERTY_TYPE_MAP = {
|
|||
"Apartment": "Flats/Maisonettes",
|
||||
"Penthouse": "Flats/Maisonettes",
|
||||
"Ground Flat": "Flats/Maisonettes",
|
||||
"Duplex": "Flats/Maisonettes",
|
||||
"Detached Bungalow": "Detached",
|
||||
"Semi-Detached Bungalow": "Semi-Detached",
|
||||
"Town House": "Terraced",
|
||||
|
|
@ -75,9 +76,15 @@ PROPERTY_TYPE_MAP = {
|
|||
"Bungalow": "Other",
|
||||
"Cottage": "Other",
|
||||
"Park Home": "Other",
|
||||
"Mobile Home": "Other",
|
||||
"Caravan": "Other",
|
||||
"Lodge": "Other",
|
||||
"Land": "Other",
|
||||
"Farm / Barn": "Other",
|
||||
"Farm House": "Other",
|
||||
"House": "Detached",
|
||||
"House of Multiple Occupation": "Flats/Maisonettes",
|
||||
"House Share": "Other",
|
||||
"Not Specified": "Other",
|
||||
"Chalet": "Other",
|
||||
"Barn Conversion": "Other",
|
||||
|
|
@ -85,9 +92,20 @@ PROPERTY_TYPE_MAP = {
|
|||
"Character Property": "Other",
|
||||
"Cluster House": "Other",
|
||||
"Retirement Property": "Flats/Maisonettes",
|
||||
"Parking": "Other",
|
||||
"Plot": "Other",
|
||||
"Garages": "Other",
|
||||
"Mews": "Terraced",
|
||||
"Property": "Other",
|
||||
# Lowercase variants (from home.co.uk / Rightmove APIs)
|
||||
"house": "Detached",
|
||||
"bungalow": "Other",
|
||||
"townhouse": "Terraced",
|
||||
"land": "Other",
|
||||
"other": "Other",
|
||||
"not-specified": "Other",
|
||||
"retirement-property": "Flats/Maisonettes",
|
||||
"equestrian-facility": "Other",
|
||||
}
|
||||
|
||||
CHANNELS = [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue