111 lines
3.2 KiB
Python
111 lines
3.2 KiB
Python
import logging
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from flask import Flask, Response, jsonify, send_from_directory
|
|
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
|
|
|
from constants import DATA_DIR
|
|
from rightmove import outcode_cache
|
|
from scraper import (
|
|
_sync_gauges,
|
|
build_postcode_index,
|
|
load_outcodes,
|
|
run_scrape,
|
|
status,
|
|
status_lock,
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Logging
|
|
# ---------------------------------------------------------------------------
|
|
|
|
LOG_DIR = Path("/app/data")
|
|
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
handlers=[
|
|
logging.StreamHandler(),
|
|
logging.FileHandler(LOG_DIR / "rightmove.log"),
|
|
],
|
|
)
|
|
log = logging.getLogger("rightmove")
|
|
log.setLevel(logging.DEBUG)
|
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Startup: load data
|
|
# ---------------------------------------------------------------------------
|
|
|
|
log.info("Loading arcgis data...")
|
|
OUTCODES = load_outcodes()
|
|
PC_INDEX = build_postcode_index()
|
|
log.info("Ready — %d outcodes, postcode index built", len(OUTCODES))
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Flask app
|
|
# ---------------------------------------------------------------------------
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
@app.route("/run", methods=["POST"])
|
|
def trigger_run():
|
|
with status_lock:
|
|
if status.state == "running":
|
|
return jsonify({"error": "Scrape already running"}), 409
|
|
status.state = "running"
|
|
|
|
thread = threading.Thread(target=run_scrape, args=(OUTCODES, PC_INDEX), daemon=True)
|
|
thread.start()
|
|
return jsonify({"message": "Scrape started"}), 200
|
|
|
|
|
|
@app.route("/status")
|
|
def get_status():
|
|
with status_lock:
|
|
elapsed = 0.0
|
|
if status.started_at:
|
|
end = status.finished_at if status.finished_at else time.time()
|
|
elapsed = end - status.started_at
|
|
return jsonify({
|
|
"state": status.state,
|
|
"channel": status.channel,
|
|
"outcode": status.outcode,
|
|
"outcodes_done": status.outcodes_done,
|
|
"outcodes_total": status.outcodes_total,
|
|
"properties_buy": status.properties_buy,
|
|
"properties_rent": status.properties_rent,
|
|
"errors": status.errors[-20:], # last 20 errors
|
|
"elapsed_seconds": round(elapsed, 1),
|
|
})
|
|
|
|
|
|
@app.route("/debug")
|
|
def get_debug():
|
|
return jsonify({
|
|
"outcode_cache_size": len(outcode_cache),
|
|
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
|
|
})
|
|
|
|
|
|
@app.route("/metrics")
|
|
def metrics():
|
|
with status_lock:
|
|
_sync_gauges()
|
|
return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)
|
|
|
|
|
|
@app.route("/data/<filename>")
|
|
def serve_data(filename):
|
|
if not filename.endswith(".parquet"):
|
|
return jsonify({"error": "Only parquet files served"}), 400
|
|
return send_from_directory(DATA_DIR, filename)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run(host="0.0.0.0", port=1234, debug=False)
|