perfect-postcode/finder/docker-compose.yml

57 lines
2.3 KiB
YAML

# Finder scraper + FlareSolverr, both sharing the EXISTING media_gluetun VPN
# container's network namespace. Everything egresses through the VPN, and
# FlareSolverr solves Zoopla's Cloudflare automatically (no VNC needed).
#
# Prerequisites:
# - The `media_gluetun` container (qmcgaw/gluetun) is running on this host.
# It is managed by a different compose; it is referenced here as external
# via network_mode "container:media_gluetun".
# - Because these services share gluetun's netns, they reach each other and
# gluetun on localhost (flaresolverr :8191, gluetun control :8000) and need
# NO published ports (which is exactly why this avoids the dev-container
# port-forwarding pain).
#
# Usage:
# cd finder
# docker compose up -d --build flaresolverr finder # start the sidecars
# docker compose exec finder uv run python main.py --source zoopla --outcodes SW9 --test
# docker compose exec finder uv run python main.py --source all # full run
# docker compose down
#
# NOTE: a manually-started `finder_flaresolverr` container from testing must be
# removed first (`docker rm -f finder_flaresolverr`) to avoid a name clash.
services:
flaresolverr:
image: ghcr.io/flaresolverr/flaresolverr:latest
container_name: finder_flaresolverr
network_mode: "container:media_gluetun"
environment:
LOG_LEVEL: info
TZ: Europe/London
restart: unless-stopped
finder:
build:
context: .
dockerfile: Dockerfile
image: finder-scraper:latest
container_name: finder_scraper
network_mode: "container:media_gluetun"
depends_on:
- flaresolverr
volumes:
- .:/app/finder # live-mounted finder source
- ../property-data:/app/property-data:ro # ARCGIS postcode data
working_dir: /app/finder
environment:
# Shared netns: sidecars are on localhost, and the netns already tunnels
# all traffic through the VPN, so no HTTP proxy is used.
ZOOPLA_FETCHER: flaresolverr
FLARESOLVERR_URL: http://localhost:8191/v1
GLUETUN_CONTROL_URL: http://localhost:8000
GLUETUN_PROXY: "" # empty => direct (shared netns already tunnels)
DATA_DIR: /app/finder/data
ARCGIS_PATH: /app/property-data/arcgis_data.parquet
restart: "no"
command: ["sleep", "infinity"] # stays up; run scrapes via `docker compose exec`