all good
This commit is contained in:
parent
47d89f6fad
commit
017902b8e6
82 changed files with 331466 additions and 54841 deletions
|
|
@ -1,8 +1,13 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
|
||||
DATA_DIR = Path("/app/data")
|
||||
FINDER_DIR = Path(__file__).resolve().parent
|
||||
REPO_DIR = FINDER_DIR.parent
|
||||
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", str(FINDER_DIR / "data")))
|
||||
ARCGIS_PATH = Path(
|
||||
os.environ.get("ARCGIS_PATH", str(REPO_DIR / "property-data" / "arcgis_data.parquet"))
|
||||
)
|
||||
PAGE_SIZE = 24
|
||||
DELAY_BETWEEN_PAGES = 0.3
|
||||
DELAY_BETWEEN_OUTCODES = 0.5
|
||||
|
|
@ -10,42 +15,6 @@ MAX_RETRIES = 3
|
|||
RETRY_BASE_DELAY = 2.0
|
||||
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
|
||||
MAX_BEDROOMS = 20 # sanity cap — values above this are almost certainly parsing errors
|
||||
# Rent sanity bounds (monthly). Rents outside this range are nulled out — they are
|
||||
# almost always total-stay pricing (e.g. "Golf Open 2026" short lets), annual rents
|
||||
# mislabelled as monthly, or data errors.
|
||||
MIN_RENT_MONTHLY = 50 # below £50/month is implausible for any UK property
|
||||
MAX_RENT_MONTHLY = 25_000 # above £25k/month covers ultra-prime London; higher is suspect
|
||||
SEED = 42
|
||||
CHECKPOINT_INTERVAL = int(os.environ.get("CHECKPOINT_INTERVAL", "900")) # seconds
|
||||
|
||||
# Schedule: hour of day (UTC) to auto-run scrape. Set to -1 to disable.
|
||||
SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", "3"))
|
||||
# Whether to run a scrape immediately on startup
|
||||
RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes")
|
||||
# Enable/disable individual sources
|
||||
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
SCRAPE_ZOOPLA = os.environ.get("SCRAPE_ZOOPLA", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
|
||||
# URL to trigger server data reload after scrape (e.g. http://server:8001/api/reload)
|
||||
RELOAD_URL = os.environ.get("RELOAD_URL", "")
|
||||
|
||||
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
||||
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
|
||||
|
|
@ -55,14 +24,36 @@ RIGHTMOVE_BASE = "https://www.rightmove.co.uk"
|
|||
HOMECOUK_BASE = "https://home.co.uk"
|
||||
HOMECOUK_API_BASE = f"{HOMECOUK_BASE}/api"
|
||||
HOMECOUK_PER_PAGE = 30 # max supported by the API
|
||||
HOMECOUK_CONCURRENCY = int(os.environ.get("HOMECOUK_CONCURRENCY", "4"))
|
||||
|
||||
# OpenRent
|
||||
OPENRENT_BASE = "https://www.openrent.co.uk"
|
||||
|
||||
# Zoopla
|
||||
ZOOPLA_BASE = "https://www.zoopla.co.uk"
|
||||
|
||||
# Greater London-ish postcode areas. This intentionally uses broad area
|
||||
# prefixes so a manual scrape can include central/inner London plus common
|
||||
# outer-London and near-London outcodes without maintaining a long borough list.
|
||||
LONDON_OUTCODE_PREFIXES = {
|
||||
"E",
|
||||
"EC",
|
||||
"N",
|
||||
"NW",
|
||||
"SE",
|
||||
"SW",
|
||||
"W",
|
||||
"WC",
|
||||
"BR",
|
||||
"CR",
|
||||
"DA",
|
||||
"EN",
|
||||
"HA",
|
||||
"IG",
|
||||
"KT",
|
||||
"RM",
|
||||
"SM",
|
||||
"TW",
|
||||
"UB",
|
||||
"WD",
|
||||
}
|
||||
|
||||
PROPERTY_TYPE_MAP = {
|
||||
"Detached": "Detached",
|
||||
"Semi-Detached": "Semi-Detached",
|
||||
|
|
@ -150,5 +141,4 @@ PROPERTY_TYPE_MAP = {
|
|||
|
||||
CHANNELS = [
|
||||
{"channel": "BUY", "transactionType": "BUY", "sortType": "2"},
|
||||
{"channel": "RENT", "transactionType": "LETTING", "sortType": "6"},
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue