import os from pathlib import Path FINDER_DIR = Path(__file__).resolve().parent REPO_DIR = FINDER_DIR.parent DATA_DIR = Path(os.environ.get("DATA_DIR", str(FINDER_DIR / "data"))) ARCGIS_PATH = Path( os.environ.get( "ARCGIS_PATH", str(REPO_DIR / "property-data" / "arcgis_data.parquet") ) ) PAGE_SIZE = 24 DELAY_BETWEEN_PAGES = 0.3 DELAY_BETWEEN_OUTCODES = 0.5 MAX_RETRIES = 3 RETRY_BASE_DELAY = 2.0 GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index MAX_BEDROOMS = 20 # sanity cap — values above this are almost certainly parsing errors TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead" SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search" RIGHTMOVE_BASE = "https://www.rightmove.co.uk" # Detail page (plain HTTPS GET, no Cloudflare). Its window.__PAGE_MODEL embeds # propertyData.address.{outcode,incode}, which together form the property's TRUE # full postcode — the search API only exposes the outcode. {id} is the numeric # listing id from the search response. RIGHTMOVE_DETAIL_URL = "https://www.rightmove.co.uk/properties/{id}" # The Rightmove search API gives only an outcode-level display address, so the # true full postcode is recovered from each listing's detail page (see # finder/rightmove.py::parse_detail_postcode). One extra GET per listing is a # big throughput increase over the ~1000-result-per-outcode search, so detail # fetching is gated and capped per outcode (mirrors ZOOPLA_* below). Default ON. RIGHTMOVE_FETCH_DETAILS = True # fetch detail pages for true per-listing postcodes RIGHTMOVE_MAX_DETAILS_PER_OUTCODE = 4000 # max detail-page fetches per outcode # OnTheMarket ONTHEMARKET_BASE = "https://www.onthemarket.com" # Zoopla ZOOPLA_BASE = "https://www.zoopla.co.uk" # Zoopla search cards only carry an outcode-level address, so the full postcode # and precise coordinates are scraped from each listing's detail page. These # bound that extra work (see finder/zoopla.py and finder/scraper.py). ZOOPLA_FETCH_DETAILS = True # fetch detail pages for precise per-listing postcodes ZOOPLA_MAX_DETAILS_PER_OUTCODE = 4000 # max detail-page fetches per outcode ZOOPLA_DETAIL_GOTO_TIMEOUT_MS = 1500000 # per detail-page navigation timeout # Fraction of a single outcode's wall-clock budget (ZOOPLA_OUTCODE_TIMEOUT_SECONDS) # spent fetching details; the remainder is reserved for search pagination so # detail fetches can never trip the timeout and discard collected listings. ZOOPLA_DETAIL_BUDGET_FRACTION = 0.6 # Gluetun VPN. Network endpoints are env-overridable because they are # deployment-specific: when finder runs in a SEPARATE container they use the # `gluetun` hostname (defaults below); when finder SHARES gluetun's network # namespace (docker-compose.yml, network_mode container:media_gluetun) they # become localhost and GLUETUN_PROXY is empty (the shared netns already tunnels # all traffic, so no HTTP proxy is needed). # GLUETUN_PROXY="" (empty) => direct connection (no proxy); used in shared-netns. GLUETUN_PROXY = os.environ.get("GLUETUN_PROXY", "http://gluetun:8888") or None GLUETUN_CONTROL_URL = os.environ.get("GLUETUN_CONTROL_URL", "http://gluetun:8000") GLUETUN_API_KEY = "My8AbvnKhfyFdRhpTVfoTfa5DkAMmg8K" # Egress-IP rotations to try per Cloudflare challenge. Keep at 0 for Zoopla: # rotating among Gluetun's datacenter IPs doesn't clear Cloudflare and would # rotate away from the IP a cleared Cloudflare session was bound to, voiding it. # Raise only with residential IPs where rotation helps. GLUETUN_MAX_ROTATIONS = 0 # max egress-IP rotations per Cloudflare challenge # Zoopla fetcher: "flaresolverr" (default) solves Cloudflare via the FlareSolverr # sidecar (docker-compose.yml) and needs no display/VNC — verified to return the # RSC flight stream with postcode + coordinates; "camoufox" drives a local # anti-fingerprint browser (needs an interactive solve on datacenter IPs). ZOOPLA_FETCHER = os.environ.get("ZOOPLA_FETCHER", "flaresolverr") FLARESOLVERR_URL = os.environ.get("FLARESOLVERR_URL", "http://gluetun:8191/v1") FLARESOLVERR_MAX_TIMEOUT_MS = 120000 # per-request solve budget; first solve is slow # Greater London-ish postcode areas. This intentionally uses broad area # prefixes so a manual scrape can include central/inner London plus common # outer-London and near-London outcodes without maintaining a long borough list. LONDON_OUTCODE_PREFIXES = { "E", "EC", "N", "NW", "SE", "SW", "W", "WC", "BR", "CR", "DA", "EN", "HA", "IG", "KT", "RM", "SM", "TW", "UB", "WD", } PROPERTY_TYPE_MAP = { "Detached": "Detached", "Semi-Detached": "Semi-Detached", "Terraced": "Terraced", "End of Terrace": "Terraced", "Mid Terrace": "Terraced", "Flat": "Flats/Maisonettes", "Maisonette": "Flats/Maisonettes", "Studio": "Flats/Maisonettes", "Apartment": "Flats/Maisonettes", "Penthouse": "Flats/Maisonettes", "Ground Flat": "Flats/Maisonettes", "Duplex": "Flats/Maisonettes", "Detached Bungalow": "Detached", "Semi-Detached Bungalow": "Semi-Detached", "Town House": "Terraced", "Link Detached": "Detached", "Link Detached House": "Detached", "Bungalow": "Other", "Cottage": "Other", "Park Home": "Other", "Mobile Home": "Other", "Caravan": "Other", "Lodge": "Other", "Land": "Other", "Farm / Barn": "Other", "Farm House": "Other", "House": "Detached", "House of Multiple Occupation": "Other", "House Share": "Other", "Not Specified": "Other", "Chalet": "Other", "Barn Conversion": "Other", "Coach House": "Other", "Character Property": "Other", "Cluster House": "Other", "Retirement Property": "Other", "Parking": "Other", "Plot": "Other", "Garages": "Other", "Mews": "Terraced", "Property": "Other", "Flat Share": "Other", "Block of Apartments": "Other", "Private Halls": "Other", "Terraced Bungalow": "Terraced", "Equestrian Facility": "Other", "Ground Maisonette": "Flats/Maisonettes", "Country House": "Detached", "Village House": "Detached", "Farm Land": "Other", "House Boat": "Other", "Barn": "Other", "Serviced Apartments": "Other", # Space-separated variants from legacy provider normalization. "Semi Detached": "Semi-Detached", "Semi Detached Bungalow": "Semi-Detached", "End Of Terrace": "Terraced", "End Terrace": "Terraced", "Block Of Apartments": "Other", # Lowercase variants from listing APIs. "house": "Detached", "bungalow": "Other", "townhouse": "Terraced", "land": "Other", "other": "Other", "not-specified": "Other", "retirement-property": "Other", "equestrian-facility": "Other", "flat": "Flats/Maisonettes", "detached": "Detached", "semi-detached": "Semi-Detached", "terraced": "Terraced", "maisonette": "Flats/Maisonettes", "apartment": "Flats/Maisonettes", "studio": "Flats/Maisonettes", "penthouse": "Flats/Maisonettes", "cottage": "Other", "chalet": "Other", "farm_house": "Detached", "country house": "Detached", "village house": "Detached", } CHANNELS = [ {"channel": "BUY", "transactionType": "BUY", "sortType": "2"}, ]