57 lines
2.3 KiB
YAML
57 lines
2.3 KiB
YAML
# Finder scraper + FlareSolverr, both sharing the EXISTING media_gluetun VPN
|
|
# container's network namespace. Everything egresses through the VPN, and
|
|
# FlareSolverr solves Zoopla's Cloudflare automatically (no VNC needed).
|
|
#
|
|
# Prerequisites:
|
|
# - The `media_gluetun` container (qmcgaw/gluetun) is running on this host.
|
|
# It is managed by a different compose; it is referenced here as external
|
|
# via network_mode "container:media_gluetun".
|
|
# - Because these services share gluetun's netns, they reach each other and
|
|
# gluetun on localhost (flaresolverr :8191, gluetun control :8000) and need
|
|
# NO published ports (which is exactly why this avoids the dev-container
|
|
# port-forwarding pain).
|
|
#
|
|
# Usage:
|
|
# cd finder
|
|
# docker compose up -d --build flaresolverr finder # start the sidecars
|
|
# docker compose exec finder uv run python main.py --source zoopla --outcodes SW9 --test
|
|
# docker compose exec finder uv run python main.py --source all # full run
|
|
# docker compose down
|
|
#
|
|
# NOTE: a manually-started `finder_flaresolverr` container from testing must be
|
|
# removed first (`docker rm -f finder_flaresolverr`) to avoid a name clash.
|
|
|
|
services:
|
|
flaresolverr:
|
|
image: ghcr.io/flaresolverr/flaresolverr:latest
|
|
container_name: finder_flaresolverr
|
|
network_mode: "container:media_gluetun"
|
|
environment:
|
|
LOG_LEVEL: info
|
|
TZ: Europe/London
|
|
restart: unless-stopped
|
|
|
|
finder:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
image: finder-scraper:latest
|
|
container_name: finder_scraper
|
|
network_mode: "container:media_gluetun"
|
|
depends_on:
|
|
- flaresolverr
|
|
volumes:
|
|
- .:/app/finder # live-mounted finder source
|
|
- ../property-data:/app/property-data:ro # ARCGIS postcode data
|
|
working_dir: /app/finder
|
|
environment:
|
|
# Shared netns: sidecars are on localhost, and the netns already tunnels
|
|
# all traffic through the VPN, so no HTTP proxy is used.
|
|
ZOOPLA_FETCHER: flaresolverr
|
|
FLARESOLVERR_URL: http://localhost:8191/v1
|
|
GLUETUN_CONTROL_URL: http://localhost:8000
|
|
GLUETUN_PROXY: "" # empty => direct (shared netns already tunnels)
|
|
DATA_DIR: /app/finder/data
|
|
ARCGIS_PATH: /app/property-data/arcgis_data.parquet
|
|
restart: "no"
|
|
command: ["sleep", "infinity"] # stays up; run scrapes via `docker compose exec`
|