# Finder scraper + FlareSolverr, both sharing the EXISTING media_gluetun VPN # container's network namespace. Everything egresses through the VPN, and # FlareSolverr solves Zoopla's Cloudflare automatically (no VNC needed). # # Prerequisites: # - The `media_gluetun` container (qmcgaw/gluetun) is running on this host. # It is managed by a different compose; it is referenced here as external # via network_mode "container:media_gluetun". # - Because these services share gluetun's netns, they reach each other and # gluetun on localhost (flaresolverr :8191, gluetun control :8000) and need # NO published ports (which is exactly why this avoids the dev-container # port-forwarding pain). # # Usage: # cd finder # docker compose up -d --build flaresolverr finder # start the sidecars # docker compose exec finder uv run python main.py --source zoopla --outcodes SW9 --test # docker compose exec finder uv run python main.py --source all # full run # docker compose down # # NOTE: a manually-started `finder_flaresolverr` container from testing must be # removed first (`docker rm -f finder_flaresolverr`) to avoid a name clash. services: flaresolverr: image: ghcr.io/flaresolverr/flaresolverr:latest container_name: finder_flaresolverr network_mode: "container:media_gluetun" environment: LOG_LEVEL: info TZ: Europe/London restart: unless-stopped finder: build: context: . dockerfile: Dockerfile image: finder-scraper:latest container_name: finder_scraper network_mode: "container:media_gluetun" depends_on: - flaresolverr volumes: - .:/app/finder # live-mounted finder source - ../property-data:/app/property-data:ro # ARCGIS postcode data working_dir: /app/finder environment: # Shared netns: sidecars are on localhost, and the netns already tunnels # all traffic through the VPN, so no HTTP proxy is used. ZOOPLA_FETCHER: flaresolverr FLARESOLVERR_URL: http://localhost:8191/v1 GLUETUN_CONTROL_URL: http://localhost:8000 GLUETUN_PROXY: "" # empty => direct (shared netns already tunnels) DATA_DIR: /app/finder/data ARCGIS_PATH: /app/property-data/arcgis_data.parquet restart: "no" command: ["sleep", "infinity"] # stays up; run scrapes via `docker compose exec`