Fmt
This commit is contained in:
parent
479ef92236
commit
c38d654ac7
44 changed files with 2526 additions and 701 deletions
|
|
@ -813,8 +813,14 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Build area lookup from both sets\n",
|
"# Build area lookup from both sets\n",
|
||||||
"areas_before = {f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"] for f in no_green[\"features\"]}\n",
|
"areas_before = {\n",
|
||||||
"areas_after = {f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"] for f in with_green[\"features\"]}\n",
|
" f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"]\n",
|
||||||
|
" for f in no_green[\"features\"]\n",
|
||||||
|
"}\n",
|
||||||
|
"areas_after = {\n",
|
||||||
|
" f[\"properties\"][\"postcode\"]: f[\"properties\"][\"area_sqm\"]\n",
|
||||||
|
" for f in with_green[\"features\"]\n",
|
||||||
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Compute percentage removed\n",
|
"# Compute percentage removed\n",
|
||||||
"diffs = []\n",
|
"diffs = []\n",
|
||||||
|
|
@ -1161,16 +1167,23 @@
|
||||||
"\n",
|
"\n",
|
||||||
"colormap = cm.LinearColormap(\n",
|
"colormap = cm.LinearColormap(\n",
|
||||||
" colors=[\"#ffffcc\", \"#fd8d3c\", \"#e31a1c\", \"#800026\"],\n",
|
" colors=[\"#ffffcc\", \"#fd8d3c\", \"#e31a1c\", \"#800026\"],\n",
|
||||||
" vmin=0, vmax=min(max_pct, 90),\n",
|
" vmin=0,\n",
|
||||||
|
" vmax=min(max_pct, 90),\n",
|
||||||
" caption=\"% area removed by greenspace\",\n",
|
" caption=\"% area removed by greenspace\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"# Show original boundaries, colored by how much was removed\n",
|
"# Show original boundaries, colored by how much was removed\n",
|
||||||
"def style_by_removal(feature):\n",
|
"def style_by_removal(feature):\n",
|
||||||
" pc = feature[\"properties\"][\"postcode\"]\n",
|
" pc = feature[\"properties\"][\"postcode\"]\n",
|
||||||
" pct = diff_lookup.get(pc, 0)\n",
|
" pct = diff_lookup.get(pc, 0)\n",
|
||||||
" if pct <= 1:\n",
|
" if pct <= 1:\n",
|
||||||
" return {\"fillColor\": \"#cccccc\", \"color\": \"#999\", \"weight\": 0.5, \"fillOpacity\": 0.15}\n",
|
" return {\n",
|
||||||
|
" \"fillColor\": \"#cccccc\",\n",
|
||||||
|
" \"color\": \"#999\",\n",
|
||||||
|
" \"weight\": 0.5,\n",
|
||||||
|
" \"fillOpacity\": 0.15,\n",
|
||||||
|
" }\n",
|
||||||
" return {\n",
|
" return {\n",
|
||||||
" \"fillColor\": colormap(min(pct, 90)),\n",
|
" \"fillColor\": colormap(min(pct, 90)),\n",
|
||||||
" \"color\": \"white\",\n",
|
" \"color\": \"white\",\n",
|
||||||
|
|
@ -1178,6 +1191,7 @@
|
||||||
" \"fillOpacity\": 0.6,\n",
|
" \"fillOpacity\": 0.6,\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"folium.GeoJson(\n",
|
"folium.GeoJson(\n",
|
||||||
" no_green,\n",
|
" no_green,\n",
|
||||||
" name=\"Greenspace removal %\",\n",
|
" name=\"Greenspace removal %\",\n",
|
||||||
|
|
|
||||||
|
|
@ -54,25 +54,32 @@
|
||||||
" ape = np.abs(p - a) / a\n",
|
" ape = np.abs(p - a) / a\n",
|
||||||
" err = p - a\n",
|
" err = p - a\n",
|
||||||
" return {\n",
|
" return {\n",
|
||||||
" \"MdAPE (%)\": f\"{np.median(ape)*100:.1f}\",\n",
|
" \"MdAPE (%)\": f\"{np.median(ape) * 100:.1f}\",\n",
|
||||||
" \"% within 10%\": f\"{np.mean(ape <= 0.10)*100:.1f}\",\n",
|
" \"% within 10%\": f\"{np.mean(ape <= 0.10) * 100:.1f}\",\n",
|
||||||
" \"% within 20%\": f\"{np.mean(ape <= 0.20)*100:.1f}\",\n",
|
" \"% within 20%\": f\"{np.mean(ape <= 0.20) * 100:.1f}\",\n",
|
||||||
" \"% within 30%\": f\"{np.mean(ape <= 0.30)*100:.1f}\",\n",
|
" \"% within 30%\": f\"{np.mean(ape <= 0.30) * 100:.1f}\",\n",
|
||||||
" \"MAE (\\u00a3)\": f\"{np.mean(np.abs(err)):,.0f}\",\n",
|
" \"MAE (\\u00a3)\": f\"{np.mean(np.abs(err)):,.0f}\",\n",
|
||||||
" \"Mean signed error (\\u00a3)\": f\"{np.mean(err):+,.0f}\",\n",
|
" \"Mean signed error (\\u00a3)\": f\"{np.mean(err):+,.0f}\",\n",
|
||||||
" \"n\": f\"{len(a):,}\",\n",
|
" \"n\": f\"{len(a):,}\",\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"actual = backtest_df[\"actual_price\"].to_numpy().astype(np.float64)\n",
|
"actual = backtest_df[\"actual_price\"].to_numpy().astype(np.float64)\n",
|
||||||
"metrics = {\n",
|
"metrics = {\n",
|
||||||
" \"Naive\": compute_metrics(actual, backtest_df[\"input_price\"].to_numpy().astype(np.float64)),\n",
|
" \"Naive\": compute_metrics(\n",
|
||||||
" \"Index\": compute_metrics(actual, backtest_df[\"predicted\"].to_numpy().astype(np.float64)),\n",
|
" actual, backtest_df[\"input_price\"].to_numpy().astype(np.float64)\n",
|
||||||
|
" ),\n",
|
||||||
|
" \"Index\": compute_metrics(\n",
|
||||||
|
" actual, backtest_df[\"predicted\"].to_numpy().astype(np.float64)\n",
|
||||||
|
" ),\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"metrics_table = pl.DataFrame([\n",
|
"metrics_table = pl.DataFrame(\n",
|
||||||
" {\"Metric\": k, **{stage: v[k] for stage, v in metrics.items()}}\n",
|
" [\n",
|
||||||
" for k in list(metrics[\"Naive\"].keys())\n",
|
" {\"Metric\": k, **{stage: v[k] for stage, v in metrics.items()}}\n",
|
||||||
"])\n",
|
" for k in list(metrics[\"Naive\"].keys())\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
"metrics_table"
|
"metrics_table"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -91,8 +98,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# National index (average across all sectors weighted by n_pairs)\n",
|
"# National index (average across all sectors weighted by n_pairs)\n",
|
||||||
"national = (\n",
|
"national = (\n",
|
||||||
" index_df\n",
|
" index_df.group_by(\"year\")\n",
|
||||||
" .group_by(\"year\")\n",
|
|
||||||
" .agg(\n",
|
" .agg(\n",
|
||||||
" (pl.col(\"log_index\") * pl.col(\"n_pairs\")).sum() / pl.col(\"n_pairs\").sum(),\n",
|
" (pl.col(\"log_index\") * pl.col(\"n_pairs\")).sum() / pl.col(\"n_pairs\").sum(),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
|
|
@ -107,14 +113,23 @@
|
||||||
"\n",
|
"\n",
|
||||||
"# If not enough, pick some with high/low n_pairs\n",
|
"# If not enough, pick some with high/low n_pairs\n",
|
||||||
"if len(sample_sectors) < 3:\n",
|
"if len(sample_sectors) < 3:\n",
|
||||||
" sector_counts = index_df.group_by(\"sector\").agg(pl.col(\"n_pairs\").first()).sort(\"n_pairs\", descending=True)\n",
|
" sector_counts = (\n",
|
||||||
|
" index_df.group_by(\"sector\")\n",
|
||||||
|
" .agg(pl.col(\"n_pairs\").first())\n",
|
||||||
|
" .sort(\"n_pairs\", descending=True)\n",
|
||||||
|
" )\n",
|
||||||
" top = sector_counts.head(2)[\"sector\"].to_list()\n",
|
" top = sector_counts.head(2)[\"sector\"].to_list()\n",
|
||||||
" bottom = sector_counts.filter(pl.col(\"n_pairs\") > 0).tail(2)[\"sector\"].to_list()\n",
|
" bottom = sector_counts.filter(pl.col(\"n_pairs\") > 0).tail(2)[\"sector\"].to_list()\n",
|
||||||
" sample_sectors = list(set(sample_sectors + top + bottom))[:5]\n",
|
" sample_sectors = list(set(sample_sectors + top + bottom))[:5]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"samples = index_df.filter(pl.col(\"sector\").is_in(sample_sectors))\n",
|
"samples = index_df.filter(pl.col(\"sector\").is_in(sample_sectors))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"combined = pl.concat([national.select(\"sector\", \"year\", \"log_index\"), samples.select(\"sector\", \"year\", \"log_index\")])\n",
|
"combined = pl.concat(\n",
|
||||||
|
" [\n",
|
||||||
|
" national.select(\"sector\", \"year\", \"log_index\"),\n",
|
||||||
|
" samples.select(\"sector\", \"year\", \"log_index\"),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Normalize: index = 100 at base year (earliest available)\n",
|
"# Normalize: index = 100 at base year (earliest available)\n",
|
||||||
"combined = combined.with_columns(\n",
|
"combined = combined.with_columns(\n",
|
||||||
|
|
@ -122,7 +137,10 @@
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.line(\n",
|
"fig = px.line(\n",
|
||||||
" combined.to_pandas(), x=\"year\", y=\"index_100\", color=\"sector\",\n",
|
" combined.to_pandas(),\n",
|
||||||
|
" x=\"year\",\n",
|
||||||
|
" y=\"index_100\",\n",
|
||||||
|
" color=\"sector\",\n",
|
||||||
" title=\"Repeat-Sales Price Index (base year = 100)\",\n",
|
" title=\"Repeat-Sales Price Index (base year = 100)\",\n",
|
||||||
" labels={\"index_100\": \"Index (base=100)\", \"year\": \"Year\"},\n",
|
" labels={\"index_100\": \"Index (base=100)\", \"year\": \"Year\"},\n",
|
||||||
")\n",
|
")\n",
|
||||||
|
|
@ -155,8 +173,10 @@
|
||||||
"\n",
|
"\n",
|
||||||
"fig.update_layout(\n",
|
"fig.update_layout(\n",
|
||||||
" title=\"Absolute Percentage Error Distribution\",\n",
|
" title=\"Absolute Percentage Error Distribution\",\n",
|
||||||
" xaxis_title=\"APE (%)\", yaxis_title=\"Count\",\n",
|
" xaxis_title=\"APE (%)\",\n",
|
||||||
" barmode=\"overlay\", height=500,\n",
|
" yaxis_title=\"Count\",\n",
|
||||||
|
" barmode=\"overlay\",\n",
|
||||||
|
" height=500,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -183,17 +203,27 @@
|
||||||
"pred = sample[\"predicted\"].to_numpy().astype(np.float64)\n",
|
"pred = sample[\"predicted\"].to_numpy().astype(np.float64)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = go.Figure()\n",
|
"fig = go.Figure()\n",
|
||||||
"fig.add_trace(go.Scattergl(\n",
|
"fig.add_trace(\n",
|
||||||
" x=actual_sample, y=pred, mode=\"markers\",\n",
|
" go.Scattergl(\n",
|
||||||
" marker=dict(size=2, opacity=0.3), name=\"Index\",\n",
|
" x=actual_sample,\n",
|
||||||
"))\n",
|
" y=pred,\n",
|
||||||
|
" mode=\"markers\",\n",
|
||||||
|
" marker=dict(size=2, opacity=0.3),\n",
|
||||||
|
" name=\"Index\",\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
"# 45-degree reference line\n",
|
"# 45-degree reference line\n",
|
||||||
"min_val = max(10_000, min(actual_sample.min(), np.nanmin(pred)))\n",
|
"min_val = max(10_000, min(actual_sample.min(), np.nanmin(pred)))\n",
|
||||||
"max_val = min(5_000_000, max(actual_sample.max(), np.nanmax(pred)))\n",
|
"max_val = min(5_000_000, max(actual_sample.max(), np.nanmax(pred)))\n",
|
||||||
"fig.add_trace(go.Scatter(\n",
|
"fig.add_trace(\n",
|
||||||
" x=[min_val, max_val], y=[min_val, max_val],\n",
|
" go.Scatter(\n",
|
||||||
" mode=\"lines\", line=dict(color=\"red\", dash=\"dash\"), showlegend=False,\n",
|
" x=[min_val, max_val],\n",
|
||||||
"))\n",
|
" y=[min_val, max_val],\n",
|
||||||
|
" mode=\"lines\",\n",
|
||||||
|
" line=dict(color=\"red\", dash=\"dash\"),\n",
|
||||||
|
" showlegend=False,\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
"fig.update_xaxes(type=\"log\", title_text=\"Actual (\\u00a3)\")\n",
|
"fig.update_xaxes(type=\"log\", title_text=\"Actual (\\u00a3)\")\n",
|
||||||
"fig.update_yaxes(type=\"log\", title_text=\"Predicted (\\u00a3)\")\n",
|
"fig.update_yaxes(type=\"log\", title_text=\"Predicted (\\u00a3)\")\n",
|
||||||
"fig.update_layout(title=\"Predicted vs Actual Price (log scale, 10K sample)\", height=500)\n",
|
"fig.update_layout(title=\"Predicted vs Actual Price (log scale, 10K sample)\", height=500)\n",
|
||||||
|
|
@ -234,12 +264,22 @@
|
||||||
" for name, arr in [(\"Naive\", naive), (\"Index\", pred)]:\n",
|
" for name, arr in [(\"Naive\", naive), (\"Index\", pred)]:\n",
|
||||||
" ape = np.abs(arr[mask] - actual[mask]) / actual[mask]\n",
|
" ape = np.abs(arr[mask] - actual[mask]) / actual[mask]\n",
|
||||||
" valid = np.isfinite(ape)\n",
|
" valid = np.isfinite(ape)\n",
|
||||||
" rows.append({\"Price Band\": label, \"Method\": name, \"MdAPE (%)\": float(np.median(ape[valid]) * 100)})\n",
|
" rows.append(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"Price Band\": label,\n",
|
||||||
|
" \"Method\": name,\n",
|
||||||
|
" \"MdAPE (%)\": float(np.median(ape[valid]) * 100),\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"band_df = pl.DataFrame(rows)\n",
|
"band_df = pl.DataFrame(rows)\n",
|
||||||
"fig = px.bar(\n",
|
"fig = px.bar(\n",
|
||||||
" band_df.to_pandas(), x=\"Price Band\", y=\"MdAPE (%)\", color=\"Method\",\n",
|
" band_df.to_pandas(),\n",
|
||||||
" barmode=\"group\", title=\"MdAPE by Price Band\",\n",
|
" x=\"Price Band\",\n",
|
||||||
|
" y=\"MdAPE (%)\",\n",
|
||||||
|
" color=\"Method\",\n",
|
||||||
|
" barmode=\"group\",\n",
|
||||||
|
" title=\"MdAPE by Price Band\",\n",
|
||||||
" category_orders={\"Price Band\": [b[2] for b in bands]},\n",
|
" category_orders={\"Price Band\": [b[2] for b in bands]},\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_layout(height=450)\n",
|
"fig.update_layout(height=450)\n",
|
||||||
|
|
@ -264,7 +304,9 @@
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Top 20 areas by volume\n",
|
"# Top 20 areas by volume\n",
|
||||||
"top_areas = bt.group_by(\"area\").len().sort(\"len\", descending=True).head(20)[\"area\"].to_list()\n",
|
"top_areas = (\n",
|
||||||
|
" bt.group_by(\"area\").len().sort(\"len\", descending=True).head(20)[\"area\"].to_list()\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"actual_np = bt[\"actual_price\"].to_numpy().astype(np.float64)\n",
|
"actual_np = bt[\"actual_price\"].to_numpy().astype(np.float64)\n",
|
||||||
"pred_np = bt[\"predicted\"].to_numpy().astype(np.float64)\n",
|
"pred_np = bt[\"predicted\"].to_numpy().astype(np.float64)\n",
|
||||||
|
|
@ -279,12 +321,18 @@
|
||||||
" p = arr[mask]\n",
|
" p = arr[mask]\n",
|
||||||
" valid = np.isfinite(p) & (a > 0)\n",
|
" valid = np.isfinite(p) & (a > 0)\n",
|
||||||
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
|
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
|
||||||
" rows.append({\"Area\": area, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)})\n",
|
" rows.append(\n",
|
||||||
|
" {\"Area\": area, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)}\n",
|
||||||
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"area_df = pl.DataFrame(rows)\n",
|
"area_df = pl.DataFrame(rows)\n",
|
||||||
"fig = px.bar(\n",
|
"fig = px.bar(\n",
|
||||||
" area_df.to_pandas(), x=\"Area\", y=\"MdAPE (%)\", color=\"Method\",\n",
|
" area_df.to_pandas(),\n",
|
||||||
" barmode=\"group\", title=\"MdAPE by Postcode Area (Top 20 by Volume)\",\n",
|
" x=\"Area\",\n",
|
||||||
|
" y=\"MdAPE (%)\",\n",
|
||||||
|
" color=\"Method\",\n",
|
||||||
|
" barmode=\"group\",\n",
|
||||||
|
" title=\"MdAPE by Postcode Area (Top 20 by Volume)\",\n",
|
||||||
" category_orders={\"Area\": top_areas},\n",
|
" category_orders={\"Area\": top_areas},\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_layout(height=500)\n",
|
"fig.update_layout(height=500)\n",
|
||||||
|
|
@ -324,11 +372,20 @@
|
||||||
" p = arr[mask]\n",
|
" p = arr[mask]\n",
|
||||||
" valid = np.isfinite(p) & (a > 0)\n",
|
" valid = np.isfinite(p) & (a > 0)\n",
|
||||||
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
|
" ape = np.abs(p[valid] - a[valid]) / a[valid]\n",
|
||||||
" rows.append({\"Gap (years)\": gap, \"Method\": name, \"MdAPE (%)\": float(np.median(ape) * 100)})\n",
|
" rows.append(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"Gap (years)\": gap,\n",
|
||||||
|
" \"Method\": name,\n",
|
||||||
|
" \"MdAPE (%)\": float(np.median(ape) * 100),\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"gap_df = pl.DataFrame(rows)\n",
|
"gap_df = pl.DataFrame(rows)\n",
|
||||||
"fig = px.line(\n",
|
"fig = px.line(\n",
|
||||||
" gap_df.to_pandas(), x=\"Gap (years)\", y=\"MdAPE (%)\", color=\"Method\",\n",
|
" gap_df.to_pandas(),\n",
|
||||||
|
" x=\"Gap (years)\",\n",
|
||||||
|
" y=\"MdAPE (%)\",\n",
|
||||||
|
" color=\"Method\",\n",
|
||||||
" title=\"MdAPE by Holding Period (years between input and actual sale)\",\n",
|
" title=\"MdAPE by Holding Period (years between input and actual sale)\",\n",
|
||||||
" markers=True,\n",
|
" markers=True,\n",
|
||||||
")\n",
|
")\n",
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,9 @@
|
||||||
"pl.Config.set_tbl_rows(20)\n",
|
"pl.Config.set_tbl_rows(20)\n",
|
||||||
"pl.Config.set_fmt_str_lengths(80)\n",
|
"pl.Config.set_fmt_str_lengths(80)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df = pl.read_parquet(\"/volumes/syncthing/Projects/property-map/property-data/rightmove_buy.parquet\")\n",
|
"df = pl.read_parquet(\n",
|
||||||
|
" \"/volumes/syncthing/Projects/property-map/property-data/rightmove_buy.parquet\"\n",
|
||||||
|
")\n",
|
||||||
"schema = df.schema\n",
|
"schema = df.schema\n",
|
||||||
"print(f\"Total rows: {len(df):,}\")\n",
|
"print(f\"Total rows: {len(df):,}\")\n",
|
||||||
"print(f\"Columns ({len(schema)}):\")\n",
|
"print(f\"Columns ({len(schema)}):\")\n",
|
||||||
|
|
@ -150,11 +152,13 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Null counts\n",
|
"# Null counts\n",
|
||||||
"null_df = pl.DataFrame({\n",
|
"null_df = pl.DataFrame(\n",
|
||||||
" \"column\": df.columns,\n",
|
" {\n",
|
||||||
" \"nulls\": [df[c].null_count() for c in df.columns],\n",
|
" \"column\": df.columns,\n",
|
||||||
" \"pct\": [f\"{df[c].null_count()/len(df)*100:.1f}%\" for c in df.columns],\n",
|
" \"nulls\": [df[c].null_count() for c in df.columns],\n",
|
||||||
"})\n",
|
" \"pct\": [f\"{df[c].null_count() / len(df) * 100:.1f}%\" for c in df.columns],\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
"null_df.filter(pl.col(\"nulls\") > 0)"
|
"null_df.filter(pl.col(\"nulls\") > 0)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -197,13 +201,17 @@
|
||||||
" \"price = 0\": len(df.filter(pl.col(\"price\") == 0)),\n",
|
" \"price = 0\": len(df.filter(pl.col(\"price\") == 0)),\n",
|
||||||
" \"price > 50M\": len(df.filter(pl.col(\"price\") > 50_000_000)),\n",
|
" \"price > 50M\": len(df.filter(pl.col(\"price\") > 50_000_000)),\n",
|
||||||
" \"floorspace > 10,000 sqm\": len(df.filter(pl.col(\"floorspace_sqm\") > 10_000)),\n",
|
" \"floorspace > 10,000 sqm\": len(df.filter(pl.col(\"floorspace_sqm\") > 10_000)),\n",
|
||||||
" \"latitude outside UK (< 49 or > 61)\": len(df.filter((pl.col(\"latitude\") < 49) | (pl.col(\"latitude\") > 61))),\n",
|
" \"latitude outside UK (< 49 or > 61)\": len(\n",
|
||||||
" \"longitude outside UK (< -8 or > 2)\": len(df.filter((pl.col(\"longitude\") < -8) | (pl.col(\"longitude\") > 2))),\n",
|
" df.filter((pl.col(\"latitude\") < 49) | (pl.col(\"latitude\") > 61))\n",
|
||||||
|
" ),\n",
|
||||||
|
" \"longitude outside UK (< -8 or > 2)\": len(\n",
|
||||||
|
" df.filter((pl.col(\"longitude\") < -8) | (pl.col(\"longitude\") > 2))\n",
|
||||||
|
" ),\n",
|
||||||
" \"house_share = true\": len(df.filter(pl.col(\"house_share\"))),\n",
|
" \"house_share = true\": len(df.filter(pl.col(\"house_share\"))),\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"print(\"Data quality issues:\")\n",
|
"print(\"Data quality issues:\")\n",
|
||||||
"for desc, count in issues.items():\n",
|
"for desc, count in issues.items():\n",
|
||||||
" print(f\" {desc}: {count:,} ({count/len(df)*100:.2f}%)\")"
|
" print(f\" {desc}: {count:,} ({count / len(df) * 100:.2f}%)\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -230,7 +238,7 @@
|
||||||
" & (pl.col(\"longitude\") >= -8)\n",
|
" & (pl.col(\"longitude\") >= -8)\n",
|
||||||
" & (pl.col(\"longitude\") <= 2)\n",
|
" & (pl.col(\"longitude\") <= 2)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(f\"Clean rows: {len(clean):,} ({len(clean)/len(df)*100:.1f}% of original)\")"
|
"print(f\"Clean rows: {len(clean):,} ({len(clean) / len(df) * 100:.1f}% of original)\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -1126,8 +1134,12 @@
|
||||||
"# Price histogram (clipped to 2nd-98th percentile)\n",
|
"# Price histogram (clipped to 2nd-98th percentile)\n",
|
||||||
"lo, hi = price.quantile(0.02), price.quantile(0.98)\n",
|
"lo, hi = price.quantile(0.02), price.quantile(0.98)\n",
|
||||||
"clipped = clean.filter((pl.col(\"price\") >= lo) & (pl.col(\"price\") <= hi))\n",
|
"clipped = clean.filter((pl.col(\"price\") >= lo) & (pl.col(\"price\") <= hi))\n",
|
||||||
"fig = px.histogram(clipped.to_pandas(), x=\"price\", nbins=80,\n",
|
"fig = px.histogram(\n",
|
||||||
" title=f\"Asking Price Distribution (£{lo:,.0f} - £{hi:,.0f}, 2nd-98th pctl)\")\n",
|
" clipped.to_pandas(),\n",
|
||||||
|
" x=\"price\",\n",
|
||||||
|
" nbins=80,\n",
|
||||||
|
" title=f\"Asking Price Distribution (£{lo:,.0f} - £{hi:,.0f}, 2nd-98th pctl)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400, xaxis_title=\"Asking Price (£)\", yaxis_title=\"Count\")\n",
|
"fig.update_layout(height=400, xaxis_title=\"Asking Price (£)\", yaxis_title=\"Count\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -439978,9 +439990,13 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Price by property type\n",
|
"# Price by property type\n",
|
||||||
"fig = px.box(clean.filter(pl.col(\"price\") <= 2_000_000).to_pandas(),\n",
|
"fig = px.box(\n",
|
||||||
" x=\"property_type\", y=\"price\", color=\"property_type\",\n",
|
" clean.filter(pl.col(\"price\") <= 2_000_000).to_pandas(),\n",
|
||||||
" title=\"Price by Property Type (capped at £2M for readability)\")\n",
|
" x=\"property_type\",\n",
|
||||||
|
" y=\"price\",\n",
|
||||||
|
" color=\"property_type\",\n",
|
||||||
|
" title=\"Price by Property Type (capped at £2M for readability)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=500, showlegend=False, yaxis_title=\"Price (£)\")\n",
|
"fig.update_layout(height=500, showlegend=False, yaxis_title=\"Price (£)\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -440079,9 +440095,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Price qualifier breakdown\n",
|
"# Price qualifier breakdown\n",
|
||||||
"pq = clean[\"price_qualifier\"].value_counts().sort(\"count\", descending=True)\n",
|
"pq = clean[\"price_qualifier\"].value_counts().sort(\"count\", descending=True)\n",
|
||||||
"pq = pq.with_columns(\n",
|
"pq = pq.with_columns((pl.col(\"count\") / pl.col(\"count\").sum() * 100).alias(\"pct\"))\n",
|
||||||
" (pl.col(\"count\") / pl.col(\"count\").sum() * 100).alias(\"pct\")\n",
|
|
||||||
")\n",
|
|
||||||
"pq"
|
"pq"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -440928,8 +440942,12 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Property type distribution\n",
|
"# Property type distribution\n",
|
||||||
"type_counts = clean[\"property_type\"].value_counts().sort(\"count\", descending=True)\n",
|
"type_counts = clean[\"property_type\"].value_counts().sort(\"count\", descending=True)\n",
|
||||||
"fig = px.pie(type_counts.to_pandas(), names=\"property_type\", values=\"count\",\n",
|
"fig = px.pie(\n",
|
||||||
" title=\"Property Type Distribution\")\n",
|
" type_counts.to_pandas(),\n",
|
||||||
|
" names=\"property_type\",\n",
|
||||||
|
" values=\"count\",\n",
|
||||||
|
" title=\"Property Type Distribution\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400)\n",
|
"fig.update_layout(height=400)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -441805,9 +441823,16 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Top 20 sub-types\n",
|
"# Top 20 sub-types\n",
|
||||||
"sub_counts = clean[\"property_sub_type\"].value_counts().sort(\"count\", descending=True).head(20)\n",
|
"sub_counts = (\n",
|
||||||
"fig = px.bar(sub_counts.to_pandas(), x=\"count\", y=\"property_sub_type\", orientation=\"h\",\n",
|
" clean[\"property_sub_type\"].value_counts().sort(\"count\", descending=True).head(20)\n",
|
||||||
" title=\"Top 20 Property Sub-types\")\n",
|
")\n",
|
||||||
|
"fig = px.bar(\n",
|
||||||
|
" sub_counts.to_pandas(),\n",
|
||||||
|
" x=\"count\",\n",
|
||||||
|
" y=\"property_sub_type\",\n",
|
||||||
|
" orientation=\"h\",\n",
|
||||||
|
" title=\"Top 20 Property Sub-types\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=600, yaxis={\"categoryorder\": \"total ascending\"})\n",
|
"fig.update_layout(height=600, yaxis={\"categoryorder\": \"total ascending\"})\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -442643,9 +442668,15 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Tenure split\n",
|
"# Tenure split\n",
|
||||||
"tenure_counts = clean[\"tenure\"].drop_nulls().value_counts().sort(\"count\", descending=True)\n",
|
"tenure_counts = (\n",
|
||||||
"fig = px.pie(tenure_counts.to_pandas(), names=\"tenure\", values=\"count\",\n",
|
" clean[\"tenure\"].drop_nulls().value_counts().sort(\"count\", descending=True)\n",
|
||||||
" title=f\"Tenure Split ({clean['tenure'].null_count():,} unknown / {clean['tenure'].null_count()/len(clean)*100:.1f}% missing)\")\n",
|
")\n",
|
||||||
|
"fig = px.pie(\n",
|
||||||
|
" tenure_counts.to_pandas(),\n",
|
||||||
|
" names=\"tenure\",\n",
|
||||||
|
" values=\"count\",\n",
|
||||||
|
" title=f\"Tenure Split ({clean['tenure'].null_count():,} unknown / {clean['tenure'].null_count() / len(clean) * 100:.1f}% missing)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400)\n",
|
"fig.update_layout(height=400)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -443546,8 +443577,14 @@
|
||||||
" .agg(pl.len().alias(\"count\"))\n",
|
" .agg(pl.len().alias(\"count\"))\n",
|
||||||
" .sort(\"property_type\")\n",
|
" .sort(\"property_type\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig = px.bar(tenure_by_type.to_pandas(), x=\"property_type\", y=\"count\", color=\"tenure\",\n",
|
"fig = px.bar(\n",
|
||||||
" barmode=\"group\", title=\"Tenure by Property Type\")\n",
|
" tenure_by_type.to_pandas(),\n",
|
||||||
|
" x=\"property_type\",\n",
|
||||||
|
" y=\"count\",\n",
|
||||||
|
" color=\"tenure\",\n",
|
||||||
|
" barmode=\"group\",\n",
|
||||||
|
" title=\"Tenure by Property Type\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400)\n",
|
"fig.update_layout(height=400)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -444412,9 +444449,12 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Bedroom distribution\n",
|
"# Bedroom distribution\n",
|
||||||
"bed_counts = clean.filter(pl.col(\"bedrooms\") <= 10)[\"bedrooms\"].value_counts().sort(\"bedrooms\")\n",
|
"bed_counts = (\n",
|
||||||
"fig = px.bar(bed_counts.to_pandas(), x=\"bedrooms\", y=\"count\",\n",
|
" clean.filter(pl.col(\"bedrooms\") <= 10)[\"bedrooms\"].value_counts().sort(\"bedrooms\")\n",
|
||||||
" title=\"Bedroom Count Distribution\")\n",
|
")\n",
|
||||||
|
"fig = px.bar(\n",
|
||||||
|
" bed_counts.to_pandas(), x=\"bedrooms\", y=\"count\", title=\"Bedroom Count Distribution\"\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400)\n",
|
"fig.update_layout(height=400)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -445279,16 +445319,25 @@
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = go.Figure()\n",
|
"fig = go.Figure()\n",
|
||||||
"fig.add_trace(go.Bar(\n",
|
"fig.add_trace(\n",
|
||||||
" x=price_by_beds[\"bedrooms\"], y=price_by_beds[\"median_price\"],\n",
|
" go.Bar(\n",
|
||||||
" name=\"Median\", error_y=dict(type=\"data\",\n",
|
" x=price_by_beds[\"bedrooms\"],\n",
|
||||||
" symmetric=False,\n",
|
" y=price_by_beds[\"median_price\"],\n",
|
||||||
" array=(price_by_beds[\"p75\"] - price_by_beds[\"median_price\"]).to_list(),\n",
|
" name=\"Median\",\n",
|
||||||
" arrayminus=(price_by_beds[\"median_price\"] - price_by_beds[\"p25\"]).to_list()\n",
|
" error_y=dict(\n",
|
||||||
|
" type=\"data\",\n",
|
||||||
|
" symmetric=False,\n",
|
||||||
|
" array=(price_by_beds[\"p75\"] - price_by_beds[\"median_price\"]).to_list(),\n",
|
||||||
|
" arrayminus=(price_by_beds[\"median_price\"] - price_by_beds[\"p25\"]).to_list(),\n",
|
||||||
|
" ),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"))\n",
|
")\n",
|
||||||
"fig.update_layout(title=\"Median Price by Bedrooms (with IQR)\", height=400,\n",
|
"fig.update_layout(\n",
|
||||||
" xaxis_title=\"Bedrooms\", yaxis_title=\"Price (£)\")\n",
|
" title=\"Median Price by Bedrooms (with IQR)\",\n",
|
||||||
|
" height=400,\n",
|
||||||
|
" xaxis_title=\"Bedrooms\",\n",
|
||||||
|
" yaxis_title=\"Price (£)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -446263,8 +446312,14 @@
|
||||||
" .agg(pl.len().alias(\"count\"))\n",
|
" .agg(pl.len().alias(\"count\"))\n",
|
||||||
" .sort(\"property_type\", \"bedrooms\")\n",
|
" .sort(\"property_type\", \"bedrooms\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig = px.bar(beds_by_type.to_pandas(), x=\"bedrooms\", y=\"count\", color=\"property_type\",\n",
|
"fig = px.bar(\n",
|
||||||
" barmode=\"group\", title=\"Bedroom Distribution by Property Type\")\n",
|
" beds_by_type.to_pandas(),\n",
|
||||||
|
" x=\"bedrooms\",\n",
|
||||||
|
" y=\"count\",\n",
|
||||||
|
" color=\"property_type\",\n",
|
||||||
|
" barmode=\"group\",\n",
|
||||||
|
" title=\"Bedroom Distribution by Property Type\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=450)\n",
|
"fig.update_layout(height=450)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -446323,19 +446378,26 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Floorspace availability by property type\n",
|
"# Floorspace availability by property type\n",
|
||||||
"has_floor = clean.with_columns(pl.col(\"floorspace_sqm\").is_not_null().alias(\"has_floorspace\"))\n",
|
"has_floor = clean.with_columns(\n",
|
||||||
"floor_by_type = (\n",
|
" pl.col(\"floorspace_sqm\").is_not_null().alias(\"has_floorspace\")\n",
|
||||||
" has_floor.group_by(\"property_type\", \"has_floorspace\")\n",
|
")\n",
|
||||||
" .agg(pl.len().alias(\"count\"))\n",
|
"floor_by_type = has_floor.group_by(\"property_type\", \"has_floorspace\").agg(\n",
|
||||||
|
" pl.len().alias(\"count\")\n",
|
||||||
|
")\n",
|
||||||
|
"totals = floor_by_type.group_by(\"property_type\").agg(\n",
|
||||||
|
" pl.col(\"count\").sum().alias(\"total\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"totals = floor_by_type.group_by(\"property_type\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
|
|
||||||
"floor_pct = (\n",
|
"floor_pct = (\n",
|
||||||
" floor_by_type.filter(pl.col(\"has_floorspace\"))\n",
|
" floor_by_type.filter(pl.col(\"has_floorspace\"))\n",
|
||||||
" .join(totals, on=\"property_type\")\n",
|
" .join(totals, on=\"property_type\")\n",
|
||||||
" .with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"pct_with_floorspace\"))\n",
|
" .with_columns(\n",
|
||||||
|
" (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"pct_with_floorspace\")\n",
|
||||||
|
" )\n",
|
||||||
" .sort(\"pct_with_floorspace\", descending=True)\n",
|
" .sort(\"pct_with_floorspace\", descending=True)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(f\"Overall floorspace availability: {clean['floorspace_sqm'].drop_nulls().len():,} / {len(clean):,} ({clean['floorspace_sqm'].drop_nulls().len()/len(clean)*100:.1f}%)\")\n",
|
"print(\n",
|
||||||
|
" f\"Overall floorspace availability: {clean['floorspace_sqm'].drop_nulls().len():,} / {len(clean):,} ({clean['floorspace_sqm'].drop_nulls().len() / len(clean) * 100:.1f}%)\"\n",
|
||||||
|
")\n",
|
||||||
"floor_pct.select(\"property_type\", \"count\", \"total\", \"pct_with_floorspace\")"
|
"floor_pct.select(\"property_type\", \"count\", \"total\", \"pct_with_floorspace\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -447298,8 +447360,13 @@
|
||||||
")\n",
|
")\n",
|
||||||
"print(f\"Properties with reasonable floorspace (10-1000 sqm): {len(with_floor):,}\")\n",
|
"print(f\"Properties with reasonable floorspace (10-1000 sqm): {len(with_floor):,}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.histogram(with_floor.to_pandas(), x=\"floorspace_sqm\", nbins=80, color=\"property_type\",\n",
|
"fig = px.histogram(\n",
|
||||||
" title=\"Floorspace Distribution by Property Type\")\n",
|
" with_floor.to_pandas(),\n",
|
||||||
|
" x=\"floorspace_sqm\",\n",
|
||||||
|
" nbins=80,\n",
|
||||||
|
" color=\"property_type\",\n",
|
||||||
|
" title=\"Floorspace Distribution by Property Type\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=450, xaxis_title=\"Floorspace (sqm)\", barmode=\"overlay\")\n",
|
"fig.update_layout(height=450, xaxis_title=\"Floorspace (sqm)\", barmode=\"overlay\")\n",
|
||||||
"fig.update_traces(opacity=0.6)\n",
|
"fig.update_traces(opacity=0.6)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
|
|
@ -448176,8 +448243,12 @@
|
||||||
"print(f\" P25: £{s.quantile(0.25):,.0f}/sqm\")\n",
|
"print(f\" P25: £{s.quantile(0.25):,.0f}/sqm\")\n",
|
||||||
"print(f\" P75: £{s.quantile(0.75):,.0f}/sqm\")\n",
|
"print(f\" P75: £{s.quantile(0.75):,.0f}/sqm\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.histogram(ppsqm.to_pandas(), x=\"price_per_sqm\", nbins=80,\n",
|
"fig = px.histogram(\n",
|
||||||
" title=\"Price per Square Metre Distribution\")\n",
|
" ppsqm.to_pandas(),\n",
|
||||||
|
" x=\"price_per_sqm\",\n",
|
||||||
|
" nbins=80,\n",
|
||||||
|
" title=\"Price per Square Metre Distribution\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400, xaxis_title=\"Price per sqm (£)\")\n",
|
"fig.update_layout(height=400, xaxis_title=\"Price per sqm (£)\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -584906,8 +584977,13 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"fig = px.box(ppsqm.to_pandas(), x=\"property_type\", y=\"price_per_sqm\", color=\"property_type\",\n",
|
"fig = px.box(\n",
|
||||||
" title=\"Price per sqm by Property Type\")\n",
|
" ppsqm.to_pandas(),\n",
|
||||||
|
" x=\"property_type\",\n",
|
||||||
|
" y=\"price_per_sqm\",\n",
|
||||||
|
" color=\"property_type\",\n",
|
||||||
|
" title=\"Price per sqm by Property Type\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=450, showlegend=False, yaxis_title=\"£ per sqm\")\n",
|
"fig.update_layout(height=450, showlegend=False, yaxis_title=\"£ per sqm\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -585865,9 +585941,15 @@
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"top30 = outcode_stats.head(30)\n",
|
"top30 = outcode_stats.head(30)\n",
|
||||||
"fig = px.bar(top30.to_pandas(), x=\"count\", y=\"outcode\", orientation=\"h\",\n",
|
"fig = px.bar(\n",
|
||||||
" color=\"median_price\", color_continuous_scale=\"Viridis\",\n",
|
" top30.to_pandas(),\n",
|
||||||
" title=\"Top 30 Outcodes by Listing Volume\")\n",
|
" x=\"count\",\n",
|
||||||
|
" y=\"outcode\",\n",
|
||||||
|
" orientation=\"h\",\n",
|
||||||
|
" color=\"median_price\",\n",
|
||||||
|
" color_continuous_scale=\"Viridis\",\n",
|
||||||
|
" title=\"Top 30 Outcodes by Listing Volume\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"})\n",
|
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"})\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -587400,11 +587482,25 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Most expensive outcodes (min 50 listings)\n",
|
"# Most expensive outcodes (min 50 listings)\n",
|
||||||
"expensive = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\", descending=True).head(30)\n",
|
"expensive = (\n",
|
||||||
"fig = px.bar(expensive.to_pandas(), x=\"median_price\", y=\"outcode\", orientation=\"h\",\n",
|
" outcode_stats.filter(pl.col(\"count\") >= 50)\n",
|
||||||
" color=\"count\", color_continuous_scale=\"Blues\",\n",
|
" .sort(\"median_price\", descending=True)\n",
|
||||||
" title=\"Top 30 Most Expensive Outcodes (min 50 listings, by median price)\")\n",
|
" .head(30)\n",
|
||||||
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total ascending\"}, xaxis_title=\"Median Price (£)\")\n",
|
")\n",
|
||||||
|
"fig = px.bar(\n",
|
||||||
|
" expensive.to_pandas(),\n",
|
||||||
|
" x=\"median_price\",\n",
|
||||||
|
" y=\"outcode\",\n",
|
||||||
|
" orientation=\"h\",\n",
|
||||||
|
" color=\"count\",\n",
|
||||||
|
" color_continuous_scale=\"Blues\",\n",
|
||||||
|
" title=\"Top 30 Most Expensive Outcodes (min 50 listings, by median price)\",\n",
|
||||||
|
")\n",
|
||||||
|
"fig.update_layout(\n",
|
||||||
|
" height=700,\n",
|
||||||
|
" yaxis={\"categoryorder\": \"total ascending\"},\n",
|
||||||
|
" xaxis_title=\"Median Price (£)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -588914,10 +589010,20 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Cheapest outcodes (min 50 listings)\n",
|
"# Cheapest outcodes (min 50 listings)\n",
|
||||||
"cheapest = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\").head(30)\n",
|
"cheapest = outcode_stats.filter(pl.col(\"count\") >= 50).sort(\"median_price\").head(30)\n",
|
||||||
"fig = px.bar(cheapest.to_pandas(), x=\"median_price\", y=\"outcode\", orientation=\"h\",\n",
|
"fig = px.bar(\n",
|
||||||
" color=\"count\", color_continuous_scale=\"Blues\",\n",
|
" cheapest.to_pandas(),\n",
|
||||||
" title=\"Top 30 Cheapest Outcodes (min 50 listings, by median price)\")\n",
|
" x=\"median_price\",\n",
|
||||||
"fig.update_layout(height=700, yaxis={\"categoryorder\": \"total descending\"}, xaxis_title=\"Median Price (£)\")\n",
|
" y=\"outcode\",\n",
|
||||||
|
" orientation=\"h\",\n",
|
||||||
|
" color=\"count\",\n",
|
||||||
|
" color_continuous_scale=\"Blues\",\n",
|
||||||
|
" title=\"Top 30 Cheapest Outcodes (min 50 listings, by median price)\",\n",
|
||||||
|
")\n",
|
||||||
|
"fig.update_layout(\n",
|
||||||
|
" height=700,\n",
|
||||||
|
" yaxis={\"categoryorder\": \"total descending\"},\n",
|
||||||
|
" xaxis_title=\"Median Price (£)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -589828,14 +589934,19 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Geographic scatter of listings (sample for performance)\n",
|
"# Geographic scatter of listings (sample for performance)\n",
|
||||||
"sample = clean.sample(n=min(20_000, len(clean)), seed=42)\n",
|
"sample = clean.sample(n=min(20_000, len(clean)), seed=42)\n",
|
||||||
"fig = px.scatter_map(sample.to_pandas(),\n",
|
"fig = px.scatter_map(\n",
|
||||||
" lat=\"latitude\", lon=\"longitude\",\n",
|
" sample.to_pandas(),\n",
|
||||||
" color=\"price\", size_max=4,\n",
|
" lat=\"latitude\",\n",
|
||||||
" color_continuous_scale=\"Viridis\",\n",
|
" lon=\"longitude\",\n",
|
||||||
" range_color=[100_000, 1_500_000],\n",
|
" color=\"price\",\n",
|
||||||
" zoom=5, center={\"lat\": 52.5, \"lon\": -1.5},\n",
|
" size_max=4,\n",
|
||||||
" title=\"Listing Locations (20k sample, colored by price)\",\n",
|
" color_continuous_scale=\"Viridis\",\n",
|
||||||
" opacity=0.4)\n",
|
" range_color=[100_000, 1_500_000],\n",
|
||||||
|
" zoom=5,\n",
|
||||||
|
" center={\"lat\": 52.5, \"lon\": -1.5},\n",
|
||||||
|
" title=\"Listing Locations (20k sample, colored by price)\",\n",
|
||||||
|
" opacity=0.4,\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=700)\n",
|
"fig.update_layout(height=700)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -589864,7 +589975,9 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Parse dates and look at listing age\n",
|
"# Parse dates and look at listing age\n",
|
||||||
"with_dates = clean.with_columns(\n",
|
"with_dates = clean.with_columns(\n",
|
||||||
" pl.col(\"first_visible_date\").str.to_datetime(\"%Y-%m-%dT%H:%M:%SZ\").alias(\"listed_at\"),\n",
|
" pl.col(\"first_visible_date\")\n",
|
||||||
|
" .str.to_datetime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
|
||||||
|
" .alias(\"listed_at\"),\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(f\"Date range: {with_dates['listed_at'].min()} to {with_dates['listed_at'].max()}\")"
|
"print(f\"Date range: {with_dates['listed_at'].min()} to {with_dates['listed_at'].max()}\")"
|
||||||
|
|
@ -590856,8 +590969,9 @@
|
||||||
" .sort(\"month\")\n",
|
" .sort(\"month\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.bar(monthly.to_pandas(), x=\"month\", y=\"count\",\n",
|
"fig = px.bar(\n",
|
||||||
" title=\"Listings by Month Listed\")\n",
|
" monthly.to_pandas(), x=\"month\", y=\"count\", title=\"Listings by Month Listed\"\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400, xaxis_title=\"Month\", yaxis_title=\"Listings\")\n",
|
"fig.update_layout(height=400, xaxis_title=\"Month\", yaxis_title=\"Listings\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -590884,6 +590998,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# How old are current listings? (days since first visible)\n",
|
"# How old are current listings? (days since first visible)\n",
|
||||||
"import datetime\n",
|
"import datetime\n",
|
||||||
|
"\n",
|
||||||
"now = datetime.datetime(2026, 2, 14)\n",
|
"now = datetime.datetime(2026, 2, 14)\n",
|
||||||
"with_age = with_dates.with_columns(\n",
|
"with_age = with_dates.with_columns(\n",
|
||||||
" ((pl.lit(now) - pl.col(\"listed_at\")).dt.total_days()).alias(\"days_on_market\")\n",
|
" ((pl.lit(now) - pl.col(\"listed_at\")).dt.total_days()).alias(\"days_on_market\")\n",
|
||||||
|
|
@ -590896,7 +591011,7 @@
|
||||||
"print(f\" P25: {age.quantile(0.25):.0f} days\")\n",
|
"print(f\" P25: {age.quantile(0.25):.0f} days\")\n",
|
||||||
"print(f\" P75: {age.quantile(0.75):.0f} days\")\n",
|
"print(f\" P75: {age.quantile(0.75):.0f} days\")\n",
|
||||||
"print(f\" P95: {age.quantile(0.95):.0f} days\")\n",
|
"print(f\" P95: {age.quantile(0.95):.0f} days\")\n",
|
||||||
"print(f\" Max: {age.max():.0f} days ({age.max()/365:.1f} years)\")"
|
"print(f\" Max: {age.max():.0f} days ({age.max() / 365:.1f} years)\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -591749,8 +591864,12 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Days on market distribution (cap at 2 years for readability)\n",
|
"# Days on market distribution (cap at 2 years for readability)\n",
|
||||||
"capped = with_age.filter(pl.col(\"days_on_market\") <= 730)\n",
|
"capped = with_age.filter(pl.col(\"days_on_market\") <= 730)\n",
|
||||||
"fig = px.histogram(capped.to_pandas(), x=\"days_on_market\", nbins=100,\n",
|
"fig = px.histogram(\n",
|
||||||
" title=\"Days on Market Distribution (capped at 2 years)\")\n",
|
" capped.to_pandas(),\n",
|
||||||
|
" x=\"days_on_market\",\n",
|
||||||
|
" nbins=100,\n",
|
||||||
|
" title=\"Days on Market Distribution (capped at 2 years)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=400, xaxis_title=\"Days on Market\", yaxis_title=\"Count\")\n",
|
"fig.update_layout(height=400, xaxis_title=\"Days on Market\", yaxis_title=\"Count\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -591883,11 +592002,13 @@
|
||||||
"# Explode features list and count most common\n",
|
"# Explode features list and count most common\n",
|
||||||
"features_exploded = clean.select(\"features\").explode(\"features\").drop_nulls()\n",
|
"features_exploded = clean.select(\"features\").explode(\"features\").drop_nulls()\n",
|
||||||
"print(f\"Total feature entries: {len(features_exploded):,}\")\n",
|
"print(f\"Total feature entries: {len(features_exploded):,}\")\n",
|
||||||
"print(f\"Features per listing: {len(features_exploded)/len(clean):.1f} avg\")\n",
|
"print(f\"Features per listing: {len(features_exploded) / len(clean):.1f} avg\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Most common features (lowercased for grouping)\n",
|
"# Most common features (lowercased for grouping)\n",
|
||||||
"feature_counts = (\n",
|
"feature_counts = (\n",
|
||||||
" features_exploded.with_columns(pl.col(\"features\").str.to_lowercase().str.strip_chars().alias(\"feature_lower\"))\n",
|
" features_exploded.with_columns(\n",
|
||||||
|
" pl.col(\"features\").str.to_lowercase().str.strip_chars().alias(\"feature_lower\")\n",
|
||||||
|
" )\n",
|
||||||
" .group_by(\"feature_lower\")\n",
|
" .group_by(\"feature_lower\")\n",
|
||||||
" .agg(pl.len().alias(\"count\"))\n",
|
" .agg(pl.len().alias(\"count\"))\n",
|
||||||
" .sort(\"count\", descending=True)\n",
|
" .sort(\"count\", descending=True)\n",
|
||||||
|
|
@ -592794,16 +592915,64 @@
|
||||||
"all_features = features_exploded[\"features\"].to_list()\n",
|
"all_features = features_exploded[\"features\"].to_list()\n",
|
||||||
"word_counter = Counter()\n",
|
"word_counter = Counter()\n",
|
||||||
"for feat in all_features:\n",
|
"for feat in all_features:\n",
|
||||||
" words = re.findall(r'[a-z]+', feat.lower())\n",
|
" words = re.findall(r\"[a-z]+\", feat.lower())\n",
|
||||||
" word_counter.update(words)\n",
|
" word_counter.update(words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Filter out very short/common words\n",
|
"# Filter out very short/common words\n",
|
||||||
"stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'to', 'in', 'with', 'for', 'on', 'at', 'by', 'is', 'it', 'from', 'as', 'be', 'this', 'that', 'are', 'was', 'has', 'have', 'not', 'but', 'all', 'can', 'had', 'her', 'his', 'one', 'our', 'out', 'you', 'will'}\n",
|
"stop_words = {\n",
|
||||||
"keywords = [(w, c) for w, c in word_counter.most_common(100) if w not in stop_words and len(w) > 2]\n",
|
" \"the\",\n",
|
||||||
"kw_df = pl.DataFrame({\"word\": [w for w,c in keywords[:40]], \"count\": [c for w,c in keywords[:40]]})\n",
|
" \"a\",\n",
|
||||||
|
" \"an\",\n",
|
||||||
|
" \"and\",\n",
|
||||||
|
" \"or\",\n",
|
||||||
|
" \"of\",\n",
|
||||||
|
" \"to\",\n",
|
||||||
|
" \"in\",\n",
|
||||||
|
" \"with\",\n",
|
||||||
|
" \"for\",\n",
|
||||||
|
" \"on\",\n",
|
||||||
|
" \"at\",\n",
|
||||||
|
" \"by\",\n",
|
||||||
|
" \"is\",\n",
|
||||||
|
" \"it\",\n",
|
||||||
|
" \"from\",\n",
|
||||||
|
" \"as\",\n",
|
||||||
|
" \"be\",\n",
|
||||||
|
" \"this\",\n",
|
||||||
|
" \"that\",\n",
|
||||||
|
" \"are\",\n",
|
||||||
|
" \"was\",\n",
|
||||||
|
" \"has\",\n",
|
||||||
|
" \"have\",\n",
|
||||||
|
" \"not\",\n",
|
||||||
|
" \"but\",\n",
|
||||||
|
" \"all\",\n",
|
||||||
|
" \"can\",\n",
|
||||||
|
" \"had\",\n",
|
||||||
|
" \"her\",\n",
|
||||||
|
" \"his\",\n",
|
||||||
|
" \"one\",\n",
|
||||||
|
" \"our\",\n",
|
||||||
|
" \"out\",\n",
|
||||||
|
" \"you\",\n",
|
||||||
|
" \"will\",\n",
|
||||||
|
"}\n",
|
||||||
|
"keywords = [\n",
|
||||||
|
" (w, c)\n",
|
||||||
|
" for w, c in word_counter.most_common(100)\n",
|
||||||
|
" if w not in stop_words and len(w) > 2\n",
|
||||||
|
"]\n",
|
||||||
|
"kw_df = pl.DataFrame(\n",
|
||||||
|
" {\"word\": [w for w, c in keywords[:40]], \"count\": [c for w, c in keywords[:40]]}\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.bar(kw_df.to_pandas(), x=\"count\", y=\"word\", orientation=\"h\",\n",
|
"fig = px.bar(\n",
|
||||||
" title=\"Most Common Words in Feature Descriptions\")\n",
|
" kw_df.to_pandas(),\n",
|
||||||
|
" x=\"count\",\n",
|
||||||
|
" y=\"word\",\n",
|
||||||
|
" orientation=\"h\",\n",
|
||||||
|
" title=\"Most Common Words in Feature Descriptions\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=800, yaxis={\"categoryorder\": \"total ascending\"})\n",
|
"fig.update_layout(height=800, yaxis={\"categoryorder\": \"total ascending\"})\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -593767,9 +593936,14 @@
|
||||||
" & (pl.col(\"price\") < 3_000_000)\n",
|
" & (pl.col(\"price\") < 3_000_000)\n",
|
||||||
").sample(n=min(15_000, len(with_floor)), seed=42)\n",
|
").sample(n=min(15_000, len(with_floor)), seed=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.scatter(scatter_df.to_pandas(), x=\"floorspace_sqm\", y=\"price\",\n",
|
"fig = px.scatter(\n",
|
||||||
" color=\"property_type\", opacity=0.3,\n",
|
" scatter_df.to_pandas(),\n",
|
||||||
" title=\"Price vs Floorspace (sample, capped at £3M / 500sqm)\")\n",
|
" x=\"floorspace_sqm\",\n",
|
||||||
|
" y=\"price\",\n",
|
||||||
|
" color=\"property_type\",\n",
|
||||||
|
" opacity=0.3,\n",
|
||||||
|
" title=\"Price vs Floorspace (sample, capped at £3M / 500sqm)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=600, xaxis_title=\"Floorspace (sqm)\", yaxis_title=\"Price (£)\")\n",
|
"fig.update_layout(height=600, xaxis_title=\"Floorspace (sqm)\", yaxis_title=\"Price (£)\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -594739,8 +594913,14 @@
|
||||||
" .agg(pl.col(\"price\").median().alias(\"median_price\"), pl.len().alias(\"count\"))\n",
|
" .agg(pl.col(\"price\").median().alias(\"median_price\"), pl.len().alias(\"count\"))\n",
|
||||||
" .sort(\"property_type\", \"bedrooms\")\n",
|
" .sort(\"property_type\", \"bedrooms\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig = px.line(bp.to_pandas(), x=\"bedrooms\", y=\"median_price\", color=\"property_type\",\n",
|
"fig = px.line(\n",
|
||||||
" markers=True, title=\"Median Price by Bedrooms and Property Type\")\n",
|
" bp.to_pandas(),\n",
|
||||||
|
" x=\"bedrooms\",\n",
|
||||||
|
" y=\"median_price\",\n",
|
||||||
|
" color=\"property_type\",\n",
|
||||||
|
" markers=True,\n",
|
||||||
|
" title=\"Median Price by Bedrooms and Property Type\",\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(height=450, xaxis_title=\"Bedrooms\", yaxis_title=\"Median Price (£)\")\n",
|
"fig.update_layout(height=450, xaxis_title=\"Bedrooms\", yaxis_title=\"Median Price (£)\")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -594789,18 +594969,28 @@
|
||||||
"print(f\"Total listings: {len(clean):,}\")\n",
|
"print(f\"Total listings: {len(clean):,}\")\n",
|
||||||
"print(f\"Outcodes covered: {clean['outcode'].n_unique():,}\")\n",
|
"print(f\"Outcodes covered: {clean['outcode'].n_unique():,}\")\n",
|
||||||
"print(\"\")\n",
|
"print(\"\")\n",
|
||||||
"print(f\"Price: median £{clean['price'].median():,.0f}, mean £{clean['price'].mean():,.0f}\")\n",
|
"print(\n",
|
||||||
"print(f\"Bedrooms: median {clean['bedrooms'].median():.0f}, mean {clean['bedrooms'].mean():.1f}\")\n",
|
" f\"Price: median £{clean['price'].median():,.0f}, mean £{clean['price'].mean():,.0f}\"\n",
|
||||||
|
")\n",
|
||||||
|
"print(\n",
|
||||||
|
" f\"Bedrooms: median {clean['bedrooms'].median():.0f}, mean {clean['bedrooms'].mean():.1f}\"\n",
|
||||||
|
")\n",
|
||||||
"print(\"\")\n",
|
"print(\"\")\n",
|
||||||
"print(f\"Tenure known: {(len(clean) - clean['tenure'].null_count())/len(clean)*100:.1f}%\")\n",
|
"print(\n",
|
||||||
|
" f\"Tenure known: {(len(clean) - clean['tenure'].null_count()) / len(clean) * 100:.1f}%\"\n",
|
||||||
|
")\n",
|
||||||
"print(f\" Freehold: {len(clean.filter(pl.col('tenure') == 'Freehold')):,}\")\n",
|
"print(f\" Freehold: {len(clean.filter(pl.col('tenure') == 'Freehold')):,}\")\n",
|
||||||
"print(f\" Leasehold: {len(clean.filter(pl.col('tenure') == 'Leasehold')):,}\")\n",
|
"print(f\" Leasehold: {len(clean.filter(pl.col('tenure') == 'Leasehold')):,}\")\n",
|
||||||
"print(\"\")\n",
|
"print(\"\")\n",
|
||||||
"print(f\"Floorspace available: {clean['floorspace_sqm'].drop_nulls().len()/len(clean)*100:.1f}%\")\n",
|
"print(\n",
|
||||||
|
" f\"Floorspace available: {clean['floorspace_sqm'].drop_nulls().len() / len(clean) * 100:.1f}%\"\n",
|
||||||
|
")\n",
|
||||||
"print(\"\")\n",
|
"print(\"\")\n",
|
||||||
"print(\"Property types:\")\n",
|
"print(\"Property types:\")\n",
|
||||||
"for row in clean['property_type'].value_counts().sort('count', descending=True).iter_rows():\n",
|
"for row in (\n",
|
||||||
" print(f\" {row[0]}: {row[1]:,} ({row[1]/len(clean)*100:.1f}%)\")"
|
" clean[\"property_type\"].value_counts().sort(\"count\", descending=True).iter_rows()\n",
|
||||||
|
"):\n",
|
||||||
|
" print(f\" {row[0]}: {row[1]:,} ({row[1] / len(clean) * 100:.1f}%)\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@
|
||||||
"buy = pl.read_parquet(f\"{DATA}/online_listings_buy.parquet\")\n",
|
"buy = pl.read_parquet(f\"{DATA}/online_listings_buy.parquet\")\n",
|
||||||
"rent = pl.read_parquet(f\"{DATA}/online_listings_rent.parquet\")\n",
|
"rent = pl.read_parquet(f\"{DATA}/online_listings_rent.parquet\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"def tag_source(df: pl.DataFrame) -> pl.DataFrame:\n",
|
"def tag_source(df: pl.DataFrame) -> pl.DataFrame:\n",
|
||||||
" return df.with_columns(\n",
|
" return df.with_columns(\n",
|
||||||
" pl.when(pl.col(\"Listing URL\").str.contains(\"rightmove\"))\n",
|
" pl.when(pl.col(\"Listing URL\").str.contains(\"rightmove\"))\n",
|
||||||
|
|
@ -62,6 +63,7 @@
|
||||||
" .alias(\"source\")\n",
|
" .alias(\"source\")\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"buy = tag_source(buy)\n",
|
"buy = tag_source(buy)\n",
|
||||||
"rent = tag_source(rent)\n",
|
"rent = tag_source(rent)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -122,7 +124,7 @@
|
||||||
" print(f\"\\n=== {label} ===\")\n",
|
" print(f\"\\n=== {label} ===\")\n",
|
||||||
" for row in counts.iter_rows():\n",
|
" for row in counts.iter_rows():\n",
|
||||||
" src, cnt = row\n",
|
" src, cnt = row\n",
|
||||||
" print(f\" {src}: {cnt:,} ({cnt/len(df)*100:.1f}%)\")\n",
|
" print(f\" {src}: {cnt:,} ({cnt / len(df) * 100:.1f}%)\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Known dedup count from scraper logs\n",
|
"# Known dedup count from scraper logs\n",
|
||||||
"CROSS_DEDUP_BUY = 2_220\n",
|
"CROSS_DEDUP_BUY = 2_220\n",
|
||||||
|
|
@ -132,7 +134,7 @@
|
||||||
"print(f\"Home.co.uk scraped (before dedup): {hk_buy_total:,}\")\n",
|
"print(f\"Home.co.uk scraped (before dedup): {hk_buy_total:,}\")\n",
|
||||||
"print(f\"Home.co.uk unique (after dedup): {hk_buy_unique:,}\")\n",
|
"print(f\"Home.co.uk unique (after dedup): {hk_buy_unique:,}\")\n",
|
||||||
"print(f\"Cross-source duplicates removed: {CROSS_DEDUP_BUY:,}\")\n",
|
"print(f\"Cross-source duplicates removed: {CROSS_DEDUP_BUY:,}\")\n",
|
||||||
"print(f\"Overlap rate: {CROSS_DEDUP_BUY/hk_buy_total*100:.1f}%\")"
|
"print(f\"Overlap rate: {CROSS_DEDUP_BUY / hk_buy_total * 100:.1f}%\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -987,23 +989,29 @@
|
||||||
"# Venn-style summary\n",
|
"# Venn-style summary\n",
|
||||||
"rm_buy = len(buy.filter(pl.col(\"source\") == \"Rightmove\"))\n",
|
"rm_buy = len(buy.filter(pl.col(\"source\") == \"Rightmove\"))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = go.Figure(go.Sankey(\n",
|
"fig = go.Figure(\n",
|
||||||
" node=dict(\n",
|
" go.Sankey(\n",
|
||||||
" label=[\n",
|
" node=dict(\n",
|
||||||
" f\"Rightmove\\n{rm_buy:,}\",\n",
|
" label=[\n",
|
||||||
" f\"Home.co.uk\\n{hk_buy_total:,} scraped\",\n",
|
" f\"Rightmove\\n{rm_buy:,}\",\n",
|
||||||
" f\"Merged BUY\\n{len(buy):,}\",\n",
|
" f\"Home.co.uk\\n{hk_buy_total:,} scraped\",\n",
|
||||||
" f\"Deduped\\n{CROSS_DEDUP_BUY:,}\",\n",
|
" f\"Merged BUY\\n{len(buy):,}\",\n",
|
||||||
" ],\n",
|
" f\"Deduped\\n{CROSS_DEDUP_BUY:,}\",\n",
|
||||||
" color=[\"#2563eb\", \"#10b981\", \"#6366f1\", \"#ef4444\"],\n",
|
" ],\n",
|
||||||
" ),\n",
|
" color=[\"#2563eb\", \"#10b981\", \"#6366f1\", \"#ef4444\"],\n",
|
||||||
" link=dict(\n",
|
" ),\n",
|
||||||
" source=[0, 1, 1],\n",
|
" link=dict(\n",
|
||||||
" target=[2, 2, 3],\n",
|
" source=[0, 1, 1],\n",
|
||||||
" value=[rm_buy, hk_buy_unique, CROSS_DEDUP_BUY],\n",
|
" target=[2, 2, 3],\n",
|
||||||
" color=[\"rgba(37,99,235,0.3)\", \"rgba(16,185,129,0.3)\", \"rgba(239,68,68,0.3)\"],\n",
|
" value=[rm_buy, hk_buy_unique, CROSS_DEDUP_BUY],\n",
|
||||||
" ),\n",
|
" color=[\n",
|
||||||
"))\n",
|
" \"rgba(37,99,235,0.3)\",\n",
|
||||||
|
" \"rgba(16,185,129,0.3)\",\n",
|
||||||
|
" \"rgba(239,68,68,0.3)\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" ),\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(title=\"BUY Channel: Source Contribution Flow\", height=350)\n",
|
"fig.update_layout(title=\"BUY Channel: Source Contribution Flow\", height=350)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -1106,8 +1114,11 @@
|
||||||
"oc_comparison = (\n",
|
"oc_comparison = (\n",
|
||||||
" hk_by_oc.join(rm_by_oc, on=\"outcode\", how=\"left\")\n",
|
" hk_by_oc.join(rm_by_oc, on=\"outcode\", how=\"left\")\n",
|
||||||
" .with_columns(\n",
|
" .with_columns(\n",
|
||||||
" (pl.col(\"hk_count\") / (pl.col(\"hk_count\") + pl.col(\"rm_count\").fill_null(0)) * 100)\n",
|
" (\n",
|
||||||
" .alias(\"hk_pct_of_total\")\n",
|
" pl.col(\"hk_count\")\n",
|
||||||
|
" / (pl.col(\"hk_count\") + pl.col(\"rm_count\").fill_null(0))\n",
|
||||||
|
" * 100\n",
|
||||||
|
" ).alias(\"hk_pct_of_total\")\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" .sort(\"hk_count\", descending=True)\n",
|
" .sort(\"hk_count\", descending=True)\n",
|
||||||
")\n",
|
")\n",
|
||||||
|
|
@ -2215,18 +2226,28 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Bar chart: home.co.uk vs Rightmove counts per outcode\n",
|
"# Bar chart: home.co.uk vs Rightmove counts per outcode\n",
|
||||||
"fig = go.Figure()\n",
|
"fig = go.Figure()\n",
|
||||||
"fig.add_trace(go.Bar(\n",
|
"fig.add_trace(\n",
|
||||||
" x=oc_comparison[\"outcode\"], y=oc_comparison[\"rm_count\"],\n",
|
" go.Bar(\n",
|
||||||
" name=\"Rightmove\", marker_color=\"#2563eb\",\n",
|
" x=oc_comparison[\"outcode\"],\n",
|
||||||
"))\n",
|
" y=oc_comparison[\"rm_count\"],\n",
|
||||||
"fig.add_trace(go.Bar(\n",
|
" name=\"Rightmove\",\n",
|
||||||
" x=oc_comparison[\"outcode\"], y=oc_comparison[\"hk_count\"],\n",
|
" marker_color=\"#2563eb\",\n",
|
||||||
" name=\"Home.co.uk\", marker_color=\"#10b981\",\n",
|
" )\n",
|
||||||
"))\n",
|
")\n",
|
||||||
|
"fig.add_trace(\n",
|
||||||
|
" go.Bar(\n",
|
||||||
|
" x=oc_comparison[\"outcode\"],\n",
|
||||||
|
" y=oc_comparison[\"hk_count\"],\n",
|
||||||
|
" name=\"Home.co.uk\",\n",
|
||||||
|
" marker_color=\"#10b981\",\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
"fig.update_layout(\n",
|
"fig.update_layout(\n",
|
||||||
" barmode=\"group\", height=400,\n",
|
" barmode=\"group\",\n",
|
||||||
|
" height=400,\n",
|
||||||
" title=\"Listings per Outcode: Rightmove vs Home.co.uk (outcodes with HK coverage)\",\n",
|
" title=\"Listings per Outcode: Rightmove vs Home.co.uk (outcodes with HK coverage)\",\n",
|
||||||
" xaxis_title=\"Outcode\", yaxis_title=\"Listings\",\n",
|
" xaxis_title=\"Outcode\",\n",
|
||||||
|
" yaxis_title=\"Listings\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -3121,10 +3142,14 @@
|
||||||
"sample = covered.sample(n=min(30_000, len(covered)), seed=42)\n",
|
"sample = covered.sample(n=min(30_000, len(covered)), seed=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.scatter_map(\n",
|
"fig = px.scatter_map(\n",
|
||||||
" sample.to_pandas(), lat=\"lat\", lon=\"lon\",\n",
|
" sample.to_pandas(),\n",
|
||||||
|
" lat=\"lat\",\n",
|
||||||
|
" lon=\"lon\",\n",
|
||||||
" color=\"source\",\n",
|
" color=\"source\",\n",
|
||||||
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
||||||
" zoom=7, opacity=0.4, size_max=4,\n",
|
" zoom=7,\n",
|
||||||
|
" opacity=0.4,\n",
|
||||||
|
" size_max=4,\n",
|
||||||
" title=\"Listing Locations in Covered Outcodes (by source)\",\n",
|
" title=\"Listing Locations in Covered Outcodes (by source)\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_layout(height=600)\n",
|
"fig.update_layout(height=600)\n",
|
||||||
|
|
@ -3188,15 +3213,41 @@
|
||||||
"# For covered outcodes, compare home.co.uk listings against Rightmove\n",
|
"# For covered outcodes, compare home.co.uk listings against Rightmove\n",
|
||||||
"# to find near-matches (same postcode, same beds, price within 5%)\n",
|
"# to find near-matches (same postcode, same beds, price within 5%)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"hk = buy_oc.filter(pl.col(\"source\") == \"Home.co.uk\").select(\n",
|
"hk = (\n",
|
||||||
" \"Postcode\", \"Bedrooms\", \"Asking price\", \"Property type\", \"Address per Property Register\"\n",
|
" buy_oc.filter(pl.col(\"source\") == \"Home.co.uk\")\n",
|
||||||
").rename({\"Asking price\": \"hk_price\", \"Property type\": \"hk_type\", \"Address per Property Register\": \"hk_addr\"})\n",
|
" .select(\n",
|
||||||
|
" \"Postcode\",\n",
|
||||||
|
" \"Bedrooms\",\n",
|
||||||
|
" \"Asking price\",\n",
|
||||||
|
" \"Property type\",\n",
|
||||||
|
" \"Address per Property Register\",\n",
|
||||||
|
" )\n",
|
||||||
|
" .rename(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"Asking price\": \"hk_price\",\n",
|
||||||
|
" \"Property type\": \"hk_type\",\n",
|
||||||
|
" \"Address per Property Register\": \"hk_addr\",\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"rm = buy_oc.filter(\n",
|
"rm = (\n",
|
||||||
" pl.col(\"source\") == \"Rightmove\"\n",
|
" buy_oc.filter(pl.col(\"source\") == \"Rightmove\")\n",
|
||||||
").select(\n",
|
" .select(\n",
|
||||||
" \"Postcode\", \"Bedrooms\", \"Asking price\", \"Property type\", \"Address per Property Register\"\n",
|
" \"Postcode\",\n",
|
||||||
").rename({\"Asking price\": \"rm_price\", \"Property type\": \"rm_type\", \"Address per Property Register\": \"rm_addr\"})\n",
|
" \"Bedrooms\",\n",
|
||||||
|
" \"Asking price\",\n",
|
||||||
|
" \"Property type\",\n",
|
||||||
|
" \"Address per Property Register\",\n",
|
||||||
|
" )\n",
|
||||||
|
" .rename(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"Asking price\": \"rm_price\",\n",
|
||||||
|
" \"Property type\": \"rm_type\",\n",
|
||||||
|
" \"Address per Property Register\": \"rm_addr\",\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Join on postcode + bedrooms\n",
|
"# Join on postcode + bedrooms\n",
|
||||||
"joined = hk.join(rm, on=[\"Postcode\", \"Bedrooms\"], how=\"inner\")\n",
|
"joined = hk.join(rm, on=[\"Postcode\", \"Bedrooms\"], how=\"inner\")\n",
|
||||||
|
|
@ -3213,16 +3264,24 @@
|
||||||
"exact = joined.filter(pl.col(\"hk_price\") == pl.col(\"rm_price\"))\n",
|
"exact = joined.filter(pl.col(\"hk_price\") == pl.col(\"rm_price\"))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(f\"Home.co.uk listings (unique, in file): {len(hk):,}\")\n",
|
"print(f\"Home.co.uk listings (unique, in file): {len(hk):,}\")\n",
|
||||||
"print(f\"Rightmove listings in covered outcodes: {len(rm.filter(pl.col('Postcode').is_in(hk['Postcode']))):,}\")\n",
|
"print(\n",
|
||||||
|
" f\"Rightmove listings in covered outcodes: {len(rm.filter(pl.col('Postcode').is_in(hk['Postcode']))):,}\"\n",
|
||||||
|
")\n",
|
||||||
"print()\n",
|
"print()\n",
|
||||||
"print(f\"Joined on (postcode, bedrooms): {len(joined):,} candidate pairs\")\n",
|
"print(f\"Joined on (postcode, bedrooms): {len(joined):,} candidate pairs\")\n",
|
||||||
"print(f\" Exact price match: {len(exact):,} pairs (likely same property, different beds or already deduped)\")\n",
|
"print(\n",
|
||||||
"print(f\" Price within 5%: {len(near):,} pairs (probable duplicates with price rounding)\")\n",
|
" f\" Exact price match: {len(exact):,} pairs (likely same property, different beds or already deduped)\"\n",
|
||||||
|
")\n",
|
||||||
|
"print(\n",
|
||||||
|
" f\" Price within 5%: {len(near):,} pairs (probable duplicates with price rounding)\"\n",
|
||||||
|
")\n",
|
||||||
"print()\n",
|
"print()\n",
|
||||||
"# Unique hk listings that have at least one near-match\n",
|
"# Unique hk listings that have at least one near-match\n",
|
||||||
"hk_with_near = near.select(\"hk_price\", \"hk_addr\", \"Postcode\").unique()\n",
|
"hk_with_near = near.select(\"hk_price\", \"hk_addr\", \"Postcode\").unique()\n",
|
||||||
"print(f\"Home.co.uk listings with a near-match in RM: ~{len(hk_with_near):,}\")\n",
|
"print(f\"Home.co.uk listings with a near-match in RM: ~{len(hk_with_near):,}\")\n",
|
||||||
"print(f\"Estimated additional overlap: ~{len(hk_with_near)/len(hk)*100:.1f}% of unique HK listings\")"
|
"print(\n",
|
||||||
|
" f\"Estimated additional overlap: ~{len(hk_with_near) / len(hk) * 100:.1f}% of unique HK listings\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -4178,9 +4237,13 @@
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.histogram(\n",
|
"fig = px.histogram(\n",
|
||||||
" clipped.to_pandas(), x=\"Asking price\", color=\"source\", nbins=80,\n",
|
" clipped.to_pandas(),\n",
|
||||||
|
" x=\"Asking price\",\n",
|
||||||
|
" color=\"source\",\n",
|
||||||
|
" nbins=80,\n",
|
||||||
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
||||||
" barmode=\"overlay\", histnorm=\"probability density\",\n",
|
" barmode=\"overlay\",\n",
|
||||||
|
" histnorm=\"probability density\",\n",
|
||||||
" title=\"Price Distribution by Source (normalised, £50k–£2M)\",\n",
|
" title=\"Price Distribution by Source (normalised, £50k–£2M)\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_traces(opacity=0.6)\n",
|
"fig.update_traces(opacity=0.6)\n",
|
||||||
|
|
@ -5095,10 +5158,7 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Property type distribution by source\n",
|
"# Property type distribution by source\n",
|
||||||
"type_by_src = (\n",
|
"type_by_src = buy.group_by(\"source\", \"Property type\").agg(pl.len().alias(\"count\"))\n",
|
||||||
" buy.group_by(\"source\", \"Property type\")\n",
|
|
||||||
" .agg(pl.len().alias(\"count\"))\n",
|
|
||||||
")\n",
|
|
||||||
"# Normalise within each source\n",
|
"# Normalise within each source\n",
|
||||||
"totals = type_by_src.group_by(\"source\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
|
"totals = type_by_src.group_by(\"source\").agg(pl.col(\"count\").sum().alias(\"total\"))\n",
|
||||||
"type_by_src = type_by_src.join(totals, on=\"source\").with_columns(\n",
|
"type_by_src = type_by_src.join(totals, on=\"source\").with_columns(\n",
|
||||||
|
|
@ -5107,7 +5167,10 @@
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.bar(\n",
|
"fig = px.bar(\n",
|
||||||
" type_by_src.sort(\"Property type\").to_pandas(),\n",
|
" type_by_src.sort(\"Property type\").to_pandas(),\n",
|
||||||
" x=\"Property type\", y=\"pct\", color=\"source\", barmode=\"group\",\n",
|
" x=\"Property type\",\n",
|
||||||
|
" y=\"pct\",\n",
|
||||||
|
" color=\"source\",\n",
|
||||||
|
" barmode=\"group\",\n",
|
||||||
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
||||||
" title=\"Property Type Distribution by Source (%)\",\n",
|
" title=\"Property Type Distribution by Source (%)\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
|
|
@ -5186,7 +5249,9 @@
|
||||||
"# Property sub-type comparison — top home.co.uk sub-types\n",
|
"# Property sub-type comparison — top home.co.uk sub-types\n",
|
||||||
"hk_subtypes = (\n",
|
"hk_subtypes = (\n",
|
||||||
" buy.filter(pl.col(\"source\") == \"Home.co.uk\")[\"Property sub-type\"]\n",
|
" buy.filter(pl.col(\"source\") == \"Home.co.uk\")[\"Property sub-type\"]\n",
|
||||||
" .value_counts().sort(\"count\", descending=True).head(20)\n",
|
" .value_counts()\n",
|
||||||
|
" .sort(\"count\", descending=True)\n",
|
||||||
|
" .head(20)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"Top 20 Home.co.uk property sub-types:\")\n",
|
"print(\"Top 20 Home.co.uk property sub-types:\")\n",
|
||||||
"hk_subtypes"
|
"hk_subtypes"
|
||||||
|
|
@ -5263,9 +5328,16 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Field completeness by source\n",
|
"# Field completeness by source\n",
|
||||||
"fields = [\n",
|
"fields = [\n",
|
||||||
" \"Bedrooms\", \"Bathrooms\", \"Postcode\", \"Address per Property Register\",\n",
|
" \"Bedrooms\",\n",
|
||||||
" \"Leasehold/Freehold\", \"Property type\", \"Total floor area (sqm)\",\n",
|
" \"Bathrooms\",\n",
|
||||||
" \"Listing date\", \"Asking price\", \"Price qualifier\",\n",
|
" \"Postcode\",\n",
|
||||||
|
" \"Address per Property Register\",\n",
|
||||||
|
" \"Leasehold/Freehold\",\n",
|
||||||
|
" \"Property type\",\n",
|
||||||
|
" \"Total floor area (sqm)\",\n",
|
||||||
|
" \"Listing date\",\n",
|
||||||
|
" \"Asking price\",\n",
|
||||||
|
" \"Price qualifier\",\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"rows = []\n",
|
"rows = []\n",
|
||||||
|
|
@ -5276,17 +5348,19 @@
|
||||||
" non_null = n - subset[f].null_count()\n",
|
" non_null = n - subset[f].null_count()\n",
|
||||||
" # Also count empty strings as missing for string fields\n",
|
" # Also count empty strings as missing for string fields\n",
|
||||||
" if subset[f].dtype == pl.Utf8:\n",
|
" if subset[f].dtype == pl.Utf8:\n",
|
||||||
" non_null = len(subset.filter(\n",
|
" non_null = len(\n",
|
||||||
" pl.col(f).is_not_null() & (pl.col(f).str.len_chars() > 0)\n",
|
" subset.filter(pl.col(f).is_not_null() & (pl.col(f).str.len_chars() > 0))\n",
|
||||||
" ))\n",
|
" )\n",
|
||||||
" rows.append({\"source\": src, \"field\": f, \"pct_available\": non_null / n * 100})\n",
|
" rows.append({\"source\": src, \"field\": f, \"pct_available\": non_null / n * 100})\n",
|
||||||
"\n",
|
"\n",
|
||||||
"completeness = pl.DataFrame(rows)\n",
|
"completeness = pl.DataFrame(rows)\n",
|
||||||
"pivot = completeness.pivot(on=\"source\", index=\"field\", values=\"pct_available\")\n",
|
"pivot = completeness.pivot(on=\"source\", index=\"field\", values=\"pct_available\")\n",
|
||||||
"pivot = pivot.with_columns([\n",
|
"pivot = pivot.with_columns(\n",
|
||||||
" pl.col(\"Rightmove\").round(1),\n",
|
" [\n",
|
||||||
" pl.col(\"Home.co.uk\").round(1),\n",
|
" pl.col(\"Rightmove\").round(1),\n",
|
||||||
"])\n",
|
" pl.col(\"Home.co.uk\").round(1),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
"print(\"Field completeness (% non-null/non-empty):\")\n",
|
"print(\"Field completeness (% non-null/non-empty):\")\n",
|
||||||
"pivot"
|
"pivot"
|
||||||
]
|
]
|
||||||
|
|
@ -6198,19 +6272,26 @@
|
||||||
"# Bedroom distribution comparison\n",
|
"# Bedroom distribution comparison\n",
|
||||||
"fig = make_subplots(rows=1, cols=2, subplot_titles=(\"Rightmove\", \"Home.co.uk\"))\n",
|
"fig = make_subplots(rows=1, cols=2, subplot_titles=(\"Rightmove\", \"Home.co.uk\"))\n",
|
||||||
"for i, src in enumerate([\"Rightmove\", \"Home.co.uk\"], 1):\n",
|
"for i, src in enumerate([\"Rightmove\", \"Home.co.uk\"], 1):\n",
|
||||||
" beds = buy.filter(\n",
|
" beds = (\n",
|
||||||
" (pl.col(\"source\") == src) & (pl.col(\"Bedrooms\") <= 8)\n",
|
" buy.filter((pl.col(\"source\") == src) & (pl.col(\"Bedrooms\") <= 8))[\"Bedrooms\"]\n",
|
||||||
" )[\"Bedrooms\"].value_counts().sort(\"Bedrooms\")\n",
|
" .value_counts()\n",
|
||||||
|
" .sort(\"Bedrooms\")\n",
|
||||||
|
" )\n",
|
||||||
" # Normalise\n",
|
" # Normalise\n",
|
||||||
" total = beds[\"count\"].sum()\n",
|
" total = beds[\"count\"].sum()\n",
|
||||||
" fig.add_trace(\n",
|
" fig.add_trace(\n",
|
||||||
" go.Bar(\n",
|
" go.Bar(\n",
|
||||||
" x=beds[\"Bedrooms\"], y=beds[\"count\"] / total * 100,\n",
|
" x=beds[\"Bedrooms\"],\n",
|
||||||
|
" y=beds[\"count\"] / total * 100,\n",
|
||||||
" name=src,\n",
|
" name=src,\n",
|
||||||
" marker_color=\"#2563eb\" if src == \"Rightmove\" else \"#10b981\",\n",
|
" marker_color=\"#2563eb\" if src == \"Rightmove\" else \"#10b981\",\n",
|
||||||
" ), row=1, col=i,\n",
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=i,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"fig.update_layout(height=350, title=\"Bedroom Distribution by Source (%)\", showlegend=False)\n",
|
"fig.update_layout(\n",
|
||||||
|
" height=350, title=\"Bedroom Distribution by Source (%)\", showlegend=False\n",
|
||||||
|
")\n",
|
||||||
"fig.update_yaxes(title_text=\"%\", row=1, col=1)\n",
|
"fig.update_yaxes(title_text=\"%\", row=1, col=1)\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
|
|
@ -6287,17 +6368,23 @@
|
||||||
"\n",
|
"\n",
|
||||||
"comparison_rows = []\n",
|
"comparison_rows = []\n",
|
||||||
"for ptype in [\"Detached\", \"Semi-Detached\", \"Terraced\", \"Flats/Maisonettes\", \"Other\"]:\n",
|
"for ptype in [\"Detached\", \"Semi-Detached\", \"Terraced\", \"Flats/Maisonettes\", \"Other\"]:\n",
|
||||||
" rm_p = rm_covered.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n",
|
" rm_p = rm_covered.filter(pl.col(\"Property type\") == ptype)[\n",
|
||||||
|
" \"Asking price\"\n",
|
||||||
|
" ].drop_nulls()\n",
|
||||||
" hk_p = hk_only.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n",
|
" hk_p = hk_only.filter(pl.col(\"Property type\") == ptype)[\"Asking price\"].drop_nulls()\n",
|
||||||
" if len(rm_p) > 0 and len(hk_p) > 0:\n",
|
" if len(rm_p) > 0 and len(hk_p) > 0:\n",
|
||||||
" comparison_rows.append({\n",
|
" comparison_rows.append(\n",
|
||||||
" \"Property type\": ptype,\n",
|
" {\n",
|
||||||
" \"RM count\": len(rm_p),\n",
|
" \"Property type\": ptype,\n",
|
||||||
" \"RM median £\": int(rm_p.median()),\n",
|
" \"RM count\": len(rm_p),\n",
|
||||||
" \"HK count\": len(hk_p),\n",
|
" \"RM median £\": int(rm_p.median()),\n",
|
||||||
" \"HK median £\": int(hk_p.median()),\n",
|
" \"HK count\": len(hk_p),\n",
|
||||||
" \"HK premium %\": round((hk_p.median() - rm_p.median()) / rm_p.median() * 100, 1),\n",
|
" \"HK median £\": int(hk_p.median()),\n",
|
||||||
" })\n",
|
" \"HK premium %\": round(\n",
|
||||||
|
" (hk_p.median() - rm_p.median()) / rm_p.median() * 100, 1\n",
|
||||||
|
" ),\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"comp = pl.DataFrame(comparison_rows)\n",
|
"comp = pl.DataFrame(comparison_rows)\n",
|
||||||
"print(\"Price comparison in covered outcodes (Home.co.uk unique listings vs Rightmove):\")\n",
|
"print(\"Price comparison in covered outcodes (Home.co.uk unique listings vs Rightmove):\")\n",
|
||||||
|
|
@ -7245,9 +7332,13 @@
|
||||||
"# Listing age histogram comparison\n",
|
"# Listing age histogram comparison\n",
|
||||||
"age_plot = with_age.filter(pl.col(\"days_on_market\") <= 730) # cap at 2 years\n",
|
"age_plot = with_age.filter(pl.col(\"days_on_market\") <= 730) # cap at 2 years\n",
|
||||||
"fig = px.histogram(\n",
|
"fig = px.histogram(\n",
|
||||||
" age_plot.to_pandas(), x=\"days_on_market\", color=\"source\", nbins=60,\n",
|
" age_plot.to_pandas(),\n",
|
||||||
|
" x=\"days_on_market\",\n",
|
||||||
|
" color=\"source\",\n",
|
||||||
|
" nbins=60,\n",
|
||||||
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
" color_discrete_map={\"Rightmove\": \"#2563eb\", \"Home.co.uk\": \"#10b981\"},\n",
|
||||||
" barmode=\"overlay\", histnorm=\"probability density\",\n",
|
" barmode=\"overlay\",\n",
|
||||||
|
" histnorm=\"probability density\",\n",
|
||||||
" title=\"Days on Market Distribution by Source (normalised, capped at 2 years)\",\n",
|
" title=\"Days on Market Distribution by Source (normalised, capped at 2 years)\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_traces(opacity=0.6)\n",
|
"fig.update_traces(opacity=0.6)\n",
|
||||||
|
|
@ -7330,7 +7421,9 @@
|
||||||
"print(f\" Projected home.co.uk total: ~{projected_hk:,}\")\n",
|
"print(f\" Projected home.co.uk total: ~{projected_hk:,}\")\n",
|
||||||
"print(f\" Projected cross-dedup: ~{projected_dedup:,}\")\n",
|
"print(f\" Projected cross-dedup: ~{projected_dedup:,}\")\n",
|
||||||
"print(f\" Projected unique additions: ~{projected_unique:,}\")\n",
|
"print(f\" Projected unique additions: ~{projected_unique:,}\")\n",
|
||||||
"print(f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique/rm_buy*100:.1f}% increase)\")\n",
|
"print(\n",
|
||||||
|
" f\" Projected merged dataset: ~{rm_buy + projected_unique:,} ({projected_unique / rm_buy * 100:.1f}% increase)\"\n",
|
||||||
|
")\n",
|
||||||
"print()\n",
|
"print()\n",
|
||||||
"print(\"⚠️ These are rough estimates — the covered outcodes may not be representative\")"
|
"print(\"⚠️ These are rough estimates — the covered outcodes may not be representative\")"
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -54,11 +54,15 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"r5_bank = pl.read_parquet(\"../property-data/travel-times/transit/000000-bank-tube-station.parquet\")\n",
|
"r5_bank = pl.read_parquet(\n",
|
||||||
|
" \"../property-data/travel-times/transit/000000-bank-tube-station.parquet\"\n",
|
||||||
|
")\n",
|
||||||
"manual_bank = pl.read_parquet(\"../manual-data/journey_times_bank.parquet\")\n",
|
"manual_bank = pl.read_parquet(\"../manual-data/journey_times_bank.parquet\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(f\"R5 Bank: {r5_bank.shape[0]:,} postcodes\")\n",
|
"print(f\"R5 Bank: {r5_bank.shape[0]:,} postcodes\")\n",
|
||||||
"print(f\"Manual Bank: {manual_bank.shape[0]:,} postcodes ({manual_bank['public_transport_easy_minutes'].null_count():,} null easy)\")"
|
"print(\n",
|
||||||
|
" f\"Manual Bank: {manual_bank.shape[0]:,} postcodes ({manual_bank['public_transport_easy_minutes'].null_count():,} null easy)\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -116,25 +120,49 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# Join on postcode, keep only rows where both sources have values\n",
|
"# Join on postcode, keep only rows where both sources have values\n",
|
||||||
"bank = (\n",
|
"bank = (\n",
|
||||||
" r5_bank\n",
|
" r5_bank.join(manual_bank, left_on=\"pcds\", right_on=\"postcode\", how=\"inner\")\n",
|
||||||
" .join(manual_bank, left_on=\"pcds\", right_on=\"postcode\", how=\"inner\")\n",
|
|
||||||
" .filter(\n",
|
" .filter(\n",
|
||||||
" pl.col(\"public_transport_easy_minutes\").is_not_null()\n",
|
" pl.col(\"public_transport_easy_minutes\").is_not_null()\n",
|
||||||
" & pl.col(\"public_transport_quick_minutes\").is_not_null()\n",
|
" & pl.col(\"public_transport_quick_minutes\").is_not_null()\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" .with_columns([\n",
|
" .with_columns(\n",
|
||||||
" # Signed error: R5 - Manual (positive = R5 is slower)\n",
|
" [\n",
|
||||||
" (pl.col(\"travel_minutes\").cast(pl.Float64) - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)).alias(\"error_easy\"),\n",
|
" # Signed error: R5 - Manual (positive = R5 is slower)\n",
|
||||||
" (pl.col(\"best_minutes\").cast(pl.Float64) - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)).alias(\"error_quick\"),\n",
|
" (\n",
|
||||||
" # Absolute error\n",
|
" pl.col(\"travel_minutes\").cast(pl.Float64)\n",
|
||||||
" (pl.col(\"travel_minutes\").cast(pl.Float64) - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)).abs().alias(\"abs_error_easy\"),\n",
|
" - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)\n",
|
||||||
" (pl.col(\"best_minutes\").cast(pl.Float64) - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)).abs().alias(\"abs_error_quick\"),\n",
|
" ).alias(\"error_easy\"),\n",
|
||||||
" ])\n",
|
" (\n",
|
||||||
|
" pl.col(\"best_minutes\").cast(pl.Float64)\n",
|
||||||
|
" - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)\n",
|
||||||
|
" ).alias(\"error_quick\"),\n",
|
||||||
|
" # Absolute error\n",
|
||||||
|
" (\n",
|
||||||
|
" pl.col(\"travel_minutes\").cast(pl.Float64)\n",
|
||||||
|
" - pl.col(\"public_transport_easy_minutes\").cast(pl.Float64)\n",
|
||||||
|
" )\n",
|
||||||
|
" .abs()\n",
|
||||||
|
" .alias(\"abs_error_easy\"),\n",
|
||||||
|
" (\n",
|
||||||
|
" pl.col(\"best_minutes\").cast(pl.Float64)\n",
|
||||||
|
" - pl.col(\"public_transport_quick_minutes\").cast(pl.Float64)\n",
|
||||||
|
" )\n",
|
||||||
|
" .abs()\n",
|
||||||
|
" .alias(\"abs_error_quick\"),\n",
|
||||||
|
" ]\n",
|
||||||
|
" )\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(f\"Joined (non-null): {bank.shape[0]:,} postcodes\")\n",
|
"print(f\"Joined (non-null): {bank.shape[0]:,} postcodes\")\n",
|
||||||
"bank.select(\"pcds\", \"travel_minutes\", \"public_transport_easy_minutes\", \"error_easy\",\n",
|
"bank.select(\n",
|
||||||
" \"best_minutes\", \"public_transport_quick_minutes\", \"error_quick\").head(10)"
|
" \"pcds\",\n",
|
||||||
|
" \"travel_minutes\",\n",
|
||||||
|
" \"public_transport_easy_minutes\",\n",
|
||||||
|
" \"error_easy\",\n",
|
||||||
|
" \"best_minutes\",\n",
|
||||||
|
" \"public_transport_quick_minutes\",\n",
|
||||||
|
" \"error_quick\",\n",
|
||||||
|
").head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -196,18 +224,23 @@
|
||||||
" percentiles = [5, 25, 50, 80, 90, 95, 99]\n",
|
" percentiles = [5, 25, 50, 80, 90, 95, 99]\n",
|
||||||
" rows = []\n",
|
" rows = []\n",
|
||||||
" for p in percentiles:\n",
|
" for p in percentiles:\n",
|
||||||
" rows.append({\n",
|
" rows.append(\n",
|
||||||
" \"percentile\": f\"p{p}\",\n",
|
" {\n",
|
||||||
" f\"{label} signed error\": round(float(np.percentile(col, p)), 1),\n",
|
" \"percentile\": f\"p{p}\",\n",
|
||||||
" f\"{label} absolute error\": round(float(np.percentile(abs_col, p)), 1),\n",
|
" f\"{label} signed error\": round(float(np.percentile(col, p)), 1),\n",
|
||||||
" })\n",
|
" f\"{label} absolute error\": round(float(np.percentile(abs_col, p)), 1),\n",
|
||||||
" rows.append({\n",
|
" }\n",
|
||||||
" \"percentile\": \"mean\",\n",
|
" )\n",
|
||||||
" f\"{label} signed error\": round(float(np.mean(col)), 1),\n",
|
" rows.append(\n",
|
||||||
" f\"{label} absolute error\": round(float(np.mean(abs_col)), 1),\n",
|
" {\n",
|
||||||
" })\n",
|
" \"percentile\": \"mean\",\n",
|
||||||
|
" f\"{label} signed error\": round(float(np.mean(col)), 1),\n",
|
||||||
|
" f\"{label} absolute error\": round(float(np.mean(abs_col)), 1),\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
" return pl.DataFrame(rows)\n",
|
" return pl.DataFrame(rows)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"stats_easy = percentile_stats(\"error_easy\", \"Median (easy)\")\n",
|
"stats_easy = percentile_stats(\"error_easy\", \"Median (easy)\")\n",
|
||||||
"stats_quick = percentile_stats(\"error_quick\", \"Best (quick)\")\n",
|
"stats_quick = percentile_stats(\"error_quick\", \"Best (quick)\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -1120,24 +1153,42 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n",
|
"fig = make_subplots(\n",
|
||||||
" \"Median transit time error (R5 − TfL)\",\n",
|
" rows=1,\n",
|
||||||
" \"Best transit time error (R5 − TfL)\"\n",
|
" cols=2,\n",
|
||||||
"])\n",
|
" subplot_titles=[\n",
|
||||||
|
" \"Median transit time error (R5 − TfL)\",\n",
|
||||||
|
" \"Best transit time error (R5 − TfL)\",\n",
|
||||||
|
" ],\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Clip for readability\n",
|
"# Clip for readability\n",
|
||||||
"easy_clipped = bank[\"error_easy\"].clip(-60, 60).to_numpy()\n",
|
"easy_clipped = bank[\"error_easy\"].clip(-60, 60).to_numpy()\n",
|
||||||
"quick_clipped = bank[\"error_quick\"].clip(-60, 60).to_numpy()\n",
|
"quick_clipped = bank[\"error_quick\"].clip(-60, 60).to_numpy()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.add_trace(go.Histogram(x=easy_clipped, nbinsx=120, name=\"Median (easy)\",\n",
|
"fig.add_trace(\n",
|
||||||
" marker_color=\"#0d9488\"), row=1, col=1)\n",
|
" go.Histogram(\n",
|
||||||
"fig.add_trace(go.Histogram(x=quick_clipped, nbinsx=120, name=\"Best (quick)\",\n",
|
" x=easy_clipped, nbinsx=120, name=\"Median (easy)\", marker_color=\"#0d9488\"\n",
|
||||||
" marker_color=\"#f59e0b\"), row=1, col=2)\n",
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=1,\n",
|
||||||
|
")\n",
|
||||||
|
"fig.add_trace(\n",
|
||||||
|
" go.Histogram(\n",
|
||||||
|
" x=quick_clipped, nbinsx=120, name=\"Best (quick)\", marker_color=\"#f59e0b\"\n",
|
||||||
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=2,\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=1)\n",
|
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=1)\n",
|
||||||
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=2)\n",
|
"fig.update_xaxes(title_text=\"Error (minutes)\", row=1, col=2)\n",
|
||||||
"fig.update_yaxes(title_text=\"Count\", row=1, col=1)\n",
|
"fig.update_yaxes(title_text=\"Count\", row=1, col=1)\n",
|
||||||
"fig.update_layout(height=400, showlegend=False, title_text=\"Bank: Error Distribution (clipped ±60 min)\")\n",
|
"fig.update_layout(\n",
|
||||||
|
" height=400,\n",
|
||||||
|
" showlegend=False,\n",
|
||||||
|
" title_text=\"Bank: Error Distribution (clipped ±60 min)\",\n",
|
||||||
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -2104,34 +2155,55 @@
|
||||||
"# Sample for scatter plot performance\n",
|
"# Sample for scatter plot performance\n",
|
||||||
"sample = bank.sample(n=min(20_000, bank.shape[0]), seed=42)\n",
|
"sample = bank.sample(n=min(20_000, bank.shape[0]), seed=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n",
|
"fig = make_subplots(\n",
|
||||||
" \"Median: R5 vs TfL (easy)\",\n",
|
" rows=1,\n",
|
||||||
" \"Best: R5 vs TfL (quick)\"\n",
|
" cols=2,\n",
|
||||||
"])\n",
|
" subplot_titles=[\"Median: R5 vs TfL (easy)\", \"Best: R5 vs TfL (quick)\"],\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.add_trace(go.Scattergl(\n",
|
"fig.add_trace(\n",
|
||||||
" x=sample[\"public_transport_easy_minutes\"].to_numpy(),\n",
|
" go.Scattergl(\n",
|
||||||
" y=sample[\"travel_minutes\"].cast(pl.Float64).to_numpy(),\n",
|
" x=sample[\"public_transport_easy_minutes\"].to_numpy(),\n",
|
||||||
" mode=\"markers\", marker=dict(size=2, opacity=0.3, color=\"#0d9488\"),\n",
|
" y=sample[\"travel_minutes\"].cast(pl.Float64).to_numpy(),\n",
|
||||||
" name=\"Median\"\n",
|
" mode=\"markers\",\n",
|
||||||
"), row=1, col=1)\n",
|
" marker=dict(size=2, opacity=0.3, color=\"#0d9488\"),\n",
|
||||||
|
" name=\"Median\",\n",
|
||||||
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=1,\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.add_trace(go.Scattergl(\n",
|
"fig.add_trace(\n",
|
||||||
" x=sample[\"public_transport_quick_minutes\"].to_numpy(),\n",
|
" go.Scattergl(\n",
|
||||||
" y=sample[\"best_minutes\"].cast(pl.Float64).to_numpy(),\n",
|
" x=sample[\"public_transport_quick_minutes\"].to_numpy(),\n",
|
||||||
" mode=\"markers\", marker=dict(size=2, opacity=0.3, color=\"#f59e0b\"),\n",
|
" y=sample[\"best_minutes\"].cast(pl.Float64).to_numpy(),\n",
|
||||||
" name=\"Best\"\n",
|
" mode=\"markers\",\n",
|
||||||
"), row=1, col=2)\n",
|
" marker=dict(size=2, opacity=0.3, color=\"#f59e0b\"),\n",
|
||||||
|
" name=\"Best\",\n",
|
||||||
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=2,\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Perfect agreement line\n",
|
"# Perfect agreement line\n",
|
||||||
"for col in [1, 2]:\n",
|
"for col in [1, 2]:\n",
|
||||||
" fig.add_trace(go.Scatter(x=[0, 200], y=[0, 200], mode=\"lines\",\n",
|
" fig.add_trace(\n",
|
||||||
" line=dict(color=\"red\", dash=\"dash\", width=1),\n",
|
" go.Scatter(\n",
|
||||||
" showlegend=False), row=1, col=col)\n",
|
" x=[0, 200],\n",
|
||||||
|
" y=[0, 200],\n",
|
||||||
|
" mode=\"lines\",\n",
|
||||||
|
" line=dict(color=\"red\", dash=\"dash\", width=1),\n",
|
||||||
|
" showlegend=False,\n",
|
||||||
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=col,\n",
|
||||||
|
" )\n",
|
||||||
" fig.update_xaxes(title_text=\"TfL API (minutes)\", row=1, col=col)\n",
|
" fig.update_xaxes(title_text=\"TfL API (minutes)\", row=1, col=col)\n",
|
||||||
" fig.update_yaxes(title_text=\"R5 (minutes)\", row=1, col=col)\n",
|
" fig.update_yaxes(title_text=\"R5 (minutes)\", row=1, col=col)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.update_layout(height=500, showlegend=False, title_text=\"Bank: R5 vs TfL API (20k sample)\")\n",
|
"fig.update_layout(\n",
|
||||||
|
" height=500, showlegend=False, title_text=\"Bank: R5 vs TfL API (20k sample)\"\n",
|
||||||
|
")\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -403063,7 +403135,8 @@
|
||||||
"\n",
|
"\n",
|
||||||
"fig = px.scatter_map(\n",
|
"fig = px.scatter_map(\n",
|
||||||
" map_sample.to_pandas(),\n",
|
" map_sample.to_pandas(),\n",
|
||||||
" lat=\"lat\", lon=\"long\",\n",
|
" lat=\"lat\",\n",
|
||||||
|
" lon=\"long\",\n",
|
||||||
" color=\"error_easy\",\n",
|
" color=\"error_easy\",\n",
|
||||||
" color_continuous_scale=\"RdBu_r\", # red=positive (R5 slower), blue=negative (R5 faster)\n",
|
" color_continuous_scale=\"RdBu_r\", # red=positive (R5 slower), blue=negative (R5 faster)\n",
|
||||||
" range_color=[-30, 30],\n",
|
" range_color=[-30, 30],\n",
|
||||||
|
|
@ -403071,8 +403144,14 @@
|
||||||
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
|
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
|
||||||
" opacity=0.5,\n",
|
" opacity=0.5,\n",
|
||||||
" title=\"Bank — Median transit error (R5 − TfL easy), minutes\",\n",
|
" title=\"Bank — Median transit error (R5 − TfL easy), minutes\",\n",
|
||||||
" hover_data={\"pcds\": True, \"travel_minutes\": True, \"public_transport_easy_minutes\": True,\n",
|
" hover_data={\n",
|
||||||
" \"error_easy\": \":.0f\", \"lat\": False, \"long\": False},\n",
|
" \"pcds\": True,\n",
|
||||||
|
" \"travel_minutes\": True,\n",
|
||||||
|
" \"public_transport_easy_minutes\": True,\n",
|
||||||
|
" \"error_easy\": \":.0f\",\n",
|
||||||
|
" \"lat\": False,\n",
|
||||||
|
" \"long\": False,\n",
|
||||||
|
" },\n",
|
||||||
" height=700,\n",
|
" height=700,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_layout(map_style=\"carto-positron\")\n",
|
"fig.update_layout(map_style=\"carto-positron\")\n",
|
||||||
|
|
@ -803994,7 +804073,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"fig = px.scatter_map(\n",
|
"fig = px.scatter_map(\n",
|
||||||
" map_sample.to_pandas(),\n",
|
" map_sample.to_pandas(),\n",
|
||||||
" lat=\"lat\", lon=\"long\",\n",
|
" lat=\"lat\",\n",
|
||||||
|
" lon=\"long\",\n",
|
||||||
" color=\"error_quick\",\n",
|
" color=\"error_quick\",\n",
|
||||||
" color_continuous_scale=\"RdBu_r\",\n",
|
" color_continuous_scale=\"RdBu_r\",\n",
|
||||||
" range_color=[-30, 30],\n",
|
" range_color=[-30, 30],\n",
|
||||||
|
|
@ -804002,8 +804082,14 @@
|
||||||
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
|
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
|
||||||
" opacity=0.5,\n",
|
" opacity=0.5,\n",
|
||||||
" title=\"Bank — Best transit error (R5 − TfL quick), minutes\",\n",
|
" title=\"Bank — Best transit error (R5 − TfL quick), minutes\",\n",
|
||||||
" hover_data={\"pcds\": True, \"best_minutes\": True, \"public_transport_quick_minutes\": True,\n",
|
" hover_data={\n",
|
||||||
" \"error_quick\": \":.0f\", \"lat\": False, \"long\": False},\n",
|
" \"pcds\": True,\n",
|
||||||
|
" \"best_minutes\": True,\n",
|
||||||
|
" \"public_transport_quick_minutes\": True,\n",
|
||||||
|
" \"error_quick\": \":.0f\",\n",
|
||||||
|
" \"lat\": False,\n",
|
||||||
|
" \"long\": False,\n",
|
||||||
|
" },\n",
|
||||||
" height=700,\n",
|
" height=700,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_layout(map_style=\"carto-positron\")\n",
|
"fig.update_layout(map_style=\"carto-positron\")\n",
|
||||||
|
|
@ -1204925,7 +1205011,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"fig = px.scatter_map(\n",
|
"fig = px.scatter_map(\n",
|
||||||
" map_sample.to_pandas(),\n",
|
" map_sample.to_pandas(),\n",
|
||||||
" lat=\"lat\", lon=\"long\",\n",
|
" lat=\"lat\",\n",
|
||||||
|
" lon=\"long\",\n",
|
||||||
" color=\"abs_error_easy\",\n",
|
" color=\"abs_error_easy\",\n",
|
||||||
" color_continuous_scale=\"YlOrRd\",\n",
|
" color_continuous_scale=\"YlOrRd\",\n",
|
||||||
" range_color=[0, 30],\n",
|
" range_color=[0, 30],\n",
|
||||||
|
|
@ -1204933,8 +1205020,14 @@
|
||||||
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
|
" center={\"lat\": 51.5, \"lon\": -0.1},\n",
|
||||||
" opacity=0.5,\n",
|
" opacity=0.5,\n",
|
||||||
" title=\"Bank — Absolute median transit error |R5 − TfL easy|, minutes\",\n",
|
" title=\"Bank — Absolute median transit error |R5 − TfL easy|, minutes\",\n",
|
||||||
" hover_data={\"pcds\": True, \"travel_minutes\": True, \"public_transport_easy_minutes\": True,\n",
|
" hover_data={\n",
|
||||||
" \"abs_error_easy\": \":.0f\", \"lat\": False, \"long\": False},\n",
|
" \"pcds\": True,\n",
|
||||||
|
" \"travel_minutes\": True,\n",
|
||||||
|
" \"public_transport_easy_minutes\": True,\n",
|
||||||
|
" \"abs_error_easy\": \":.0f\",\n",
|
||||||
|
" \"lat\": False,\n",
|
||||||
|
" \"long\": False,\n",
|
||||||
|
" },\n",
|
||||||
" height=700,\n",
|
" height=700,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig.update_layout(map_style=\"carto-positron\")\n",
|
"fig.update_layout(map_style=\"carto-positron\")\n",
|
||||||
|
|
@ -1204998,9 +1205091,15 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"bank.sort(\"abs_error_easy\", descending=True).select(\n",
|
"bank.sort(\"abs_error_easy\", descending=True).select(\n",
|
||||||
" \"pcds\", \"lat\", \"long\",\n",
|
" \"pcds\",\n",
|
||||||
" \"travel_minutes\", \"public_transport_easy_minutes\", \"error_easy\",\n",
|
" \"lat\",\n",
|
||||||
" \"best_minutes\", \"public_transport_quick_minutes\", \"error_quick\",\n",
|
" \"long\",\n",
|
||||||
|
" \"travel_minutes\",\n",
|
||||||
|
" \"public_transport_easy_minutes\",\n",
|
||||||
|
" \"error_easy\",\n",
|
||||||
|
" \"best_minutes\",\n",
|
||||||
|
" \"public_transport_quick_minutes\",\n",
|
||||||
|
" \"error_quick\",\n",
|
||||||
").head(30)"
|
").head(30)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -1205945,45 +1206044,75 @@
|
||||||
"\n",
|
"\n",
|
||||||
"dist_df = bank.with_columns(\n",
|
"dist_df = bank.with_columns(\n",
|
||||||
" # Rough km distance using Haversine approximation\n",
|
" # Rough km distance using Haversine approximation\n",
|
||||||
" ((((pl.col(\"lat\") - BANK_LAT) * 111.32) ** 2 +\n",
|
" (\n",
|
||||||
" ((pl.col(\"long\") - BANK_LON) * 111.32 * np.cos(np.radians(BANK_LAT))) ** 2) ** 0.5\n",
|
" (\n",
|
||||||
|
" ((pl.col(\"lat\") - BANK_LAT) * 111.32) ** 2\n",
|
||||||
|
" + ((pl.col(\"long\") - BANK_LON) * 111.32 * np.cos(np.radians(BANK_LAT))) ** 2\n",
|
||||||
|
" )\n",
|
||||||
|
" ** 0.5\n",
|
||||||
" ).alias(\"dist_km\")\n",
|
" ).alias(\"dist_km\")\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Bin by 5km\n",
|
"# Bin by 5km\n",
|
||||||
"binned = (\n",
|
"binned = (\n",
|
||||||
" dist_df\n",
|
" dist_df.with_columns((pl.col(\"dist_km\") / 5).floor() * 5)\n",
|
||||||
" .with_columns((pl.col(\"dist_km\") / 5).floor() * 5)\n",
|
|
||||||
" .group_by(\"dist_km\")\n",
|
" .group_by(\"dist_km\")\n",
|
||||||
" .agg([\n",
|
" .agg(\n",
|
||||||
" pl.col(\"error_easy\").median().alias(\"median_error_easy\"),\n",
|
" [\n",
|
||||||
" pl.col(\"error_quick\").median().alias(\"median_error_quick\"),\n",
|
" pl.col(\"error_easy\").median().alias(\"median_error_easy\"),\n",
|
||||||
" pl.col(\"abs_error_easy\").median().alias(\"median_abs_error_easy\"),\n",
|
" pl.col(\"error_quick\").median().alias(\"median_error_quick\"),\n",
|
||||||
" pl.len().alias(\"count\"),\n",
|
" pl.col(\"abs_error_easy\").median().alias(\"median_abs_error_easy\"),\n",
|
||||||
" ])\n",
|
" pl.len().alias(\"count\"),\n",
|
||||||
|
" ]\n",
|
||||||
|
" )\n",
|
||||||
" .sort(\"dist_km\")\n",
|
" .sort(\"dist_km\")\n",
|
||||||
" .filter(pl.col(\"count\") > 50)\n",
|
" .filter(pl.col(\"count\") > 50)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = make_subplots(rows=1, cols=2, subplot_titles=[\n",
|
"fig = make_subplots(\n",
|
||||||
" \"Median signed error by distance\",\n",
|
" rows=1,\n",
|
||||||
" \"Median absolute error by distance\"\n",
|
" cols=2,\n",
|
||||||
"])\n",
|
" subplot_titles=[\n",
|
||||||
|
" \"Median signed error by distance\",\n",
|
||||||
|
" \"Median absolute error by distance\",\n",
|
||||||
|
" ],\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.add_trace(go.Scatter(\n",
|
"fig.add_trace(\n",
|
||||||
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_error_easy\"].to_numpy(),\n",
|
" go.Scatter(\n",
|
||||||
" mode=\"lines+markers\", name=\"Easy\", line=dict(color=\"#0d9488\")\n",
|
" x=binned[\"dist_km\"].to_numpy(),\n",
|
||||||
"), row=1, col=1)\n",
|
" y=binned[\"median_error_easy\"].to_numpy(),\n",
|
||||||
"fig.add_trace(go.Scatter(\n",
|
" mode=\"lines+markers\",\n",
|
||||||
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_error_quick\"].to_numpy(),\n",
|
" name=\"Easy\",\n",
|
||||||
" mode=\"lines+markers\", name=\"Quick\", line=dict(color=\"#f59e0b\")\n",
|
" line=dict(color=\"#0d9488\"),\n",
|
||||||
"), row=1, col=1)\n",
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=1,\n",
|
||||||
|
")\n",
|
||||||
|
"fig.add_trace(\n",
|
||||||
|
" go.Scatter(\n",
|
||||||
|
" x=binned[\"dist_km\"].to_numpy(),\n",
|
||||||
|
" y=binned[\"median_error_quick\"].to_numpy(),\n",
|
||||||
|
" mode=\"lines+markers\",\n",
|
||||||
|
" name=\"Quick\",\n",
|
||||||
|
" line=dict(color=\"#f59e0b\"),\n",
|
||||||
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=1,\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.add_trace(go.Scatter(\n",
|
"fig.add_trace(\n",
|
||||||
" x=binned[\"dist_km\"].to_numpy(), y=binned[\"median_abs_error_easy\"].to_numpy(),\n",
|
" go.Scatter(\n",
|
||||||
" mode=\"lines+markers\", name=\"|Easy|\", line=dict(color=\"#0d9488\"),\n",
|
" x=binned[\"dist_km\"].to_numpy(),\n",
|
||||||
" showlegend=False\n",
|
" y=binned[\"median_abs_error_easy\"].to_numpy(),\n",
|
||||||
"), row=1, col=2)\n",
|
" mode=\"lines+markers\",\n",
|
||||||
|
" name=\"|Easy|\",\n",
|
||||||
|
" line=dict(color=\"#0d9488\"),\n",
|
||||||
|
" showlegend=False,\n",
|
||||||
|
" ),\n",
|
||||||
|
" row=1,\n",
|
||||||
|
" col=2,\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for col in [1, 2]:\n",
|
"for col in [1, 2]:\n",
|
||||||
" fig.update_xaxes(title_text=\"Distance from Bank (km)\", row=1, col=col)\n",
|
" fig.update_xaxes(title_text=\"Distance from Bank (km)\", row=1, col=col)\n",
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,21 @@ SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", "3"))
|
||||||
# Whether to run a scrape immediately on startup
|
# Whether to run a scrape immediately on startup
|
||||||
RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes")
|
RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes")
|
||||||
# Enable/disable individual sources
|
# Enable/disable individual sources
|
||||||
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in ("1", "true", "yes")
|
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in (
|
||||||
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in ("1", "true", "yes")
|
"1",
|
||||||
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in ("1", "true", "yes")
|
"true",
|
||||||
|
"yes",
|
||||||
|
)
|
||||||
|
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in (
|
||||||
|
"1",
|
||||||
|
"true",
|
||||||
|
"yes",
|
||||||
|
)
|
||||||
|
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in (
|
||||||
|
"1",
|
||||||
|
"true",
|
||||||
|
"yes",
|
||||||
|
)
|
||||||
|
|
||||||
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
||||||
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
|
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,8 @@ def solve_cloudflare() -> tuple[dict[str, str], str] | None:
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"Cloudflare solved — got %d cookies, UA: %s",
|
"Cloudflare solved — got %d cookies, UA: %s",
|
||||||
len(cookies), user_agent[:60],
|
len(cookies),
|
||||||
|
user_agent[:60],
|
||||||
)
|
)
|
||||||
flaresolverr_attempts_total.labels(result="success").inc()
|
flaresolverr_attempts_total.labels(result="success").inc()
|
||||||
return cookies, user_agent
|
return cookies, user_agent
|
||||||
|
|
@ -129,11 +130,13 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
|
||||||
Uses Chrome TLS impersonation so cf_clearance cookies (which are bound
|
Uses Chrome TLS impersonation so cf_clearance cookies (which are bound
|
||||||
to Chrome's JA3 fingerprint from FlareSolverr) remain valid."""
|
to Chrome's JA3 fingerprint from FlareSolverr) remain valid."""
|
||||||
session = Session(impersonate="chrome")
|
session = Session(impersonate="chrome")
|
||||||
session.headers.update({
|
session.headers.update(
|
||||||
"User-Agent": user_agent,
|
{
|
||||||
"Accept": "application/json, text/plain, */*",
|
"User-Agent": user_agent,
|
||||||
"x-requested-with": "XMLHttpRequest",
|
"Accept": "application/json, text/plain, */*",
|
||||||
})
|
"x-requested-with": "XMLHttpRequest",
|
||||||
|
}
|
||||||
|
)
|
||||||
# Laravel CSRF: the XSRF-TOKEN cookie value must also be sent as the
|
# Laravel CSRF: the XSRF-TOKEN cookie value must also be sent as the
|
||||||
# X-XSRF-TOKEN request header (URL-decoded). Without this header, the
|
# X-XSRF-TOKEN request header (URL-decoded). Without this header, the
|
||||||
# server rejects every request with 419/403.
|
# server rejects every request with 419/403.
|
||||||
|
|
@ -165,7 +168,11 @@ def fetch_page(
|
||||||
return resp.json()
|
return resp.json()
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
homecouk_errors_total.labels(type="json_decode").inc()
|
homecouk_errors_total.labels(type="json_decode").inc()
|
||||||
log.error("Non-JSON response from %s (got %s)", url, resp.headers.get("content-type", "?"))
|
log.error(
|
||||||
|
"Non-JSON response from %s (got %s)",
|
||||||
|
url,
|
||||||
|
resp.headers.get("content-type", "?"),
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
if resp.status_code == 403:
|
if resp.status_code == 403:
|
||||||
raise CookiesExpiredError("HTTP 403 — cookies likely expired")
|
raise CookiesExpiredError("HTTP 403 — cookies likely expired")
|
||||||
|
|
@ -173,7 +180,11 @@ def fetch_page(
|
||||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||||
log.warning(
|
log.warning(
|
||||||
"HTTP %d from %s, retry %d/%d in %.1fs",
|
"HTTP %d from %s, retry %d/%d in %.1fs",
|
||||||
resp.status_code, url, attempt + 1, max_retries, delay,
|
resp.status_code,
|
||||||
|
url,
|
||||||
|
attempt + 1,
|
||||||
|
max_retries,
|
||||||
|
delay,
|
||||||
)
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
continue
|
continue
|
||||||
|
|
@ -186,7 +197,11 @@ def fetch_page(
|
||||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||||
log.warning(
|
log.warning(
|
||||||
"%s from %s, retry %d/%d in %.1fs",
|
"%s from %s, retry %d/%d in %.1fs",
|
||||||
type(e).__name__, url, attempt + 1, max_retries, delay,
|
type(e).__name__,
|
||||||
|
url,
|
||||||
|
attempt + 1,
|
||||||
|
max_retries,
|
||||||
|
delay,
|
||||||
)
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
homecouk_errors_total.labels(type="retry_exhausted").inc()
|
homecouk_errors_total.labels(type="retry_exhausted").inc()
|
||||||
|
|
@ -218,7 +233,12 @@ def map_property_type(raw_type: str | None) -> str:
|
||||||
# Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc.
|
# Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc.
|
||||||
# Try common patterns
|
# Try common patterns
|
||||||
lower = raw_type.lower()
|
lower = raw_type.lower()
|
||||||
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
|
if (
|
||||||
|
"flat" in lower
|
||||||
|
or "apartment" in lower
|
||||||
|
or "maisonette" in lower
|
||||||
|
or "studio" in lower
|
||||||
|
):
|
||||||
return "Flats/Maisonettes"
|
return "Flats/Maisonettes"
|
||||||
if "detached" in lower and "semi" not in lower:
|
if "detached" in lower and "semi" not in lower:
|
||||||
return "Detached"
|
return "Detached"
|
||||||
|
|
@ -231,7 +251,9 @@ def map_property_type(raw_type: str | None) -> str:
|
||||||
|
|
||||||
|
|
||||||
def transform_property(
|
def transform_property(
|
||||||
prop: dict, channel: str, pc_index: PostcodeSpatialIndex,
|
prop: dict,
|
||||||
|
channel: str,
|
||||||
|
pc_index: PostcodeSpatialIndex,
|
||||||
) -> dict | None:
|
) -> dict | None:
|
||||||
"""Transform a raw home.co.uk property dict into our output schema."""
|
"""Transform a raw home.co.uk property dict into our output schema."""
|
||||||
lat = prop.get("latitude")
|
lat = prop.get("latitude")
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,9 @@ from metrics import http_errors_total, http_requests_total, ip_rotations_total
|
||||||
|
|
||||||
log = logging.getLogger("rightmove")
|
log = logging.getLogger("rightmove")
|
||||||
|
|
||||||
_ua = UserAgent(browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0)
|
_ua = UserAgent(
|
||||||
|
browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _endpoint_label(url: str) -> str:
|
def _endpoint_label(url: str) -> str:
|
||||||
|
|
@ -27,6 +29,7 @@ def _status_label(code: int) -> str:
|
||||||
return "5xx"
|
return "5xx"
|
||||||
return str(code)
|
return str(code)
|
||||||
|
|
||||||
|
|
||||||
# Gluetun control API — runs on port 8000 inside the gluetun container.
|
# Gluetun control API — runs on port 8000 inside the gluetun container.
|
||||||
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
|
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
|
||||||
GLUETUN_API = "http://127.0.0.1:8000"
|
GLUETUN_API = "http://127.0.0.1:8000"
|
||||||
|
|
@ -42,17 +45,25 @@ def rotate_ip() -> bool:
|
||||||
# Get current IP
|
# Get current IP
|
||||||
with httpx.Client(timeout=10) as ctl:
|
with httpx.Client(timeout=10) as ctl:
|
||||||
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||||
old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
|
old_ip = (
|
||||||
|
old_ip_resp.json().get("public_ip", "unknown")
|
||||||
|
if old_ip_resp.status_code == 200
|
||||||
|
else "unknown"
|
||||||
|
)
|
||||||
log.info("Current IP: %s", old_ip)
|
log.info("Current IP: %s", old_ip)
|
||||||
|
|
||||||
# Trigger server change — PUT with empty JSON body picks a random server
|
# Trigger server change — PUT with empty JSON body picks a random server
|
||||||
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
|
resp = ctl.put(
|
||||||
|
f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"}
|
||||||
|
)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
|
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
|
||||||
return False
|
return False
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
|
resp = ctl.put(
|
||||||
|
f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"}
|
||||||
|
)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
|
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
|
||||||
return False
|
return False
|
||||||
|
|
@ -99,7 +110,9 @@ def fetch_with_retry(
|
||||||
for attempt in range(MAX_RETRIES):
|
for attempt in range(MAX_RETRIES):
|
||||||
try:
|
try:
|
||||||
resp = client.get(url, params=params)
|
resp = client.get(url, params=params)
|
||||||
http_requests_total.labels(status=_status_label(resp.status_code), endpoint=endpoint).inc()
|
http_requests_total.labels(
|
||||||
|
status=_status_label(resp.status_code), endpoint=endpoint
|
||||||
|
).inc()
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
return resp.json()
|
return resp.json()
|
||||||
if resp.status_code == 403 and on_403:
|
if resp.status_code == 403 and on_403:
|
||||||
|
|
@ -111,15 +124,34 @@ def fetch_with_retry(
|
||||||
return None
|
return None
|
||||||
if resp.status_code in (429, 500, 502, 503, 504):
|
if resp.status_code in (429, 500, 502, 503, 504):
|
||||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||||
log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
|
log.warning(
|
||||||
|
"HTTP %d from %s, retry %d/%d in %.1fs",
|
||||||
|
resp.status_code,
|
||||||
|
url,
|
||||||
|
attempt + 1,
|
||||||
|
MAX_RETRIES,
|
||||||
|
delay,
|
||||||
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
continue
|
continue
|
||||||
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
|
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
|
||||||
return None
|
return None
|
||||||
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
|
except (
|
||||||
|
httpx.ConnectError,
|
||||||
|
httpx.ReadTimeout,
|
||||||
|
httpx.WriteTimeout,
|
||||||
|
httpx.PoolTimeout,
|
||||||
|
) as e:
|
||||||
http_errors_total.labels(type=type(e).__name__).inc()
|
http_errors_total.labels(type=type(e).__name__).inc()
|
||||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||||
log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
|
log.warning(
|
||||||
|
"%s from %s, retry %d/%d in %.1fs",
|
||||||
|
type(e).__name__,
|
||||||
|
url,
|
||||||
|
attempt + 1,
|
||||||
|
MAX_RETRIES,
|
||||||
|
delay,
|
||||||
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
http_errors_total.labels(type="retry_exhausted").inc()
|
http_errors_total.labels(type="retry_exhausted").inc()
|
||||||
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
|
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,14 @@ from pathlib import Path
|
||||||
from flask import Flask, Response, jsonify, send_from_directory
|
from flask import Flask, Response, jsonify, send_from_directory
|
||||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||||
|
|
||||||
from constants import DATA_DIR, RUN_ON_STARTUP, SCHEDULE_HOUR, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_RIGHTMOVE
|
from constants import (
|
||||||
|
DATA_DIR,
|
||||||
|
RUN_ON_STARTUP,
|
||||||
|
SCHEDULE_HOUR,
|
||||||
|
SCRAPE_HOMECOUK,
|
||||||
|
SCRAPE_OPENRENT,
|
||||||
|
SCRAPE_RIGHTMOVE,
|
||||||
|
)
|
||||||
from homecouk import load_cookies as load_homecouk_cookies
|
from homecouk import load_cookies as load_homecouk_cookies
|
||||||
from openrent import load_cookies as load_openrent_cookies
|
from openrent import load_cookies as load_openrent_cookies
|
||||||
from rightmove import outcode_cache
|
from rightmove import outcode_cache
|
||||||
|
|
@ -49,8 +56,13 @@ log.info("Loading arcgis data...")
|
||||||
OUTCODES = load_outcodes()
|
OUTCODES = load_outcodes()
|
||||||
PC_INDEX = build_postcode_index()
|
PC_INDEX = build_postcode_index()
|
||||||
PC_COORDS = build_postcode_coords() if SCRAPE_OPENRENT else None
|
PC_COORDS = build_postcode_coords() if SCRAPE_OPENRENT else None
|
||||||
log.info("Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s)",
|
log.info(
|
||||||
len(OUTCODES), SCRAPE_RIGHTMOVE, SCRAPE_HOMECOUK, SCRAPE_OPENRENT)
|
"Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s)",
|
||||||
|
len(OUTCODES),
|
||||||
|
SCRAPE_RIGHTMOVE,
|
||||||
|
SCRAPE_HOMECOUK,
|
||||||
|
SCRAPE_OPENRENT,
|
||||||
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Scheduler
|
# Scheduler
|
||||||
|
|
@ -63,7 +75,9 @@ def _start_scrape() -> bool:
|
||||||
if status.state == "running":
|
if status.state == "running":
|
||||||
return False
|
return False
|
||||||
status.state = "running"
|
status.state = "running"
|
||||||
thread = threading.Thread(target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True)
|
thread = threading.Thread(
|
||||||
|
target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True
|
||||||
|
)
|
||||||
thread.start()
|
thread.start()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
@ -82,7 +96,9 @@ def _scheduler_loop() -> None:
|
||||||
log.info("Scheduler active — will run daily at %02d:00 UTC", SCHEDULE_HOUR)
|
log.info("Scheduler active — will run daily at %02d:00 UTC", SCHEDULE_HOUR)
|
||||||
while True:
|
while True:
|
||||||
wait = _seconds_until(SCHEDULE_HOUR)
|
wait = _seconds_until(SCHEDULE_HOUR)
|
||||||
log.info("Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600)
|
log.info(
|
||||||
|
"Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600
|
||||||
|
)
|
||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
log.info("Scheduled scrape triggered")
|
log.info("Scheduled scrape triggered")
|
||||||
if not _start_scrape():
|
if not _start_scrape():
|
||||||
|
|
@ -144,15 +160,17 @@ def get_status():
|
||||||
def get_debug():
|
def get_debug():
|
||||||
hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None
|
hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None
|
||||||
or_cookies = load_openrent_cookies() if SCRAPE_OPENRENT else None
|
or_cookies = load_openrent_cookies() if SCRAPE_OPENRENT else None
|
||||||
return jsonify({
|
return jsonify(
|
||||||
"outcode_cache_size": len(outcode_cache),
|
{
|
||||||
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
|
"outcode_cache_size": len(outcode_cache),
|
||||||
"scrape_rightmove": SCRAPE_RIGHTMOVE,
|
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
|
||||||
"scrape_homecouk": SCRAPE_HOMECOUK,
|
"scrape_rightmove": SCRAPE_RIGHTMOVE,
|
||||||
"scrape_openrent": SCRAPE_OPENRENT,
|
"scrape_homecouk": SCRAPE_HOMECOUK,
|
||||||
"homecouk_cookies_available": hk_cookies is not None,
|
"scrape_openrent": SCRAPE_OPENRENT,
|
||||||
"openrent_cookies_available": or_cookies is not None,
|
"homecouk_cookies_available": hk_cookies is not None,
|
||||||
})
|
"openrent_cookies_available": or_cookies is not None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/metrics")
|
@app.route("/metrics")
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,8 @@ def solve_waf() -> tuple[dict[str, str], str] | None:
|
||||||
if "AwsWafIntegration" in content:
|
if "AwsWafIntegration" in content:
|
||||||
log.info("Got WAF challenge page, waiting for resolution...")
|
log.info("Got WAF challenge page, waiting for resolution...")
|
||||||
page.wait_for_selector(
|
page.wait_for_selector(
|
||||||
"a.pli, .pli, .search-property-card", timeout=30000,
|
"a.pli, .pli, .search-property-card",
|
||||||
|
timeout=30000,
|
||||||
)
|
)
|
||||||
|
|
||||||
raw_cookies = context.cookies()
|
raw_cookies = context.cookies()
|
||||||
|
|
@ -94,7 +95,8 @@ def solve_waf() -> tuple[dict[str, str], str] | None:
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"AWS WAF solved — got %d cookies, UA: %s",
|
"AWS WAF solved — got %d cookies, UA: %s",
|
||||||
len(cookies), user_agent[:60],
|
len(cookies),
|
||||||
|
user_agent[:60],
|
||||||
)
|
)
|
||||||
flaresolverr_attempts_total.labels(result="success").inc()
|
flaresolverr_attempts_total.labels(result="success").inc()
|
||||||
return cookies, user_agent
|
return cookies, user_agent
|
||||||
|
|
@ -130,11 +132,13 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
|
||||||
"""Create a curl_cffi Session configured for OpenRent.
|
"""Create a curl_cffi Session configured for OpenRent.
|
||||||
Uses Chrome TLS impersonation so AWS WAF cookies remain valid."""
|
Uses Chrome TLS impersonation so AWS WAF cookies remain valid."""
|
||||||
session = Session(impersonate="chrome")
|
session = Session(impersonate="chrome")
|
||||||
session.headers.update({
|
session.headers.update(
|
||||||
"User-Agent": user_agent,
|
{
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
"User-Agent": user_agent,
|
||||||
"Accept-Language": "en-GB,en;q=0.9",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
})
|
"Accept-Language": "en-GB,en;q=0.9",
|
||||||
|
}
|
||||||
|
)
|
||||||
for name, value in cookies.items():
|
for name, value in cookies.items():
|
||||||
session.cookies.set(name, value, domain="openrent.co.uk")
|
session.cookies.set(name, value, domain="openrent.co.uk")
|
||||||
return session
|
return session
|
||||||
|
|
@ -152,7 +156,9 @@ def _status_label(code: int) -> str:
|
||||||
|
|
||||||
|
|
||||||
def fetch_page(
|
def fetch_page(
|
||||||
client: Session, url: str, max_retries: int = 3,
|
client: Session,
|
||||||
|
url: str,
|
||||||
|
max_retries: int = 3,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
"""GET HTML with retries on 429/5xx. Returns None on permanent failure.
|
"""GET HTML with retries on 429/5xx. Returns None on permanent failure.
|
||||||
WAF challenge (202 or 403 with challenge JS) raises WafChallengeError."""
|
WAF challenge (202 or 403 with challenge JS) raises WafChallengeError."""
|
||||||
|
|
@ -165,17 +171,25 @@ def fetch_page(
|
||||||
html = resp.text
|
html = resp.text
|
||||||
# Detect WAF challenge page masquerading as 200
|
# Detect WAF challenge page masquerading as 200
|
||||||
if "AwsWafIntegration" in html and "challenge.js" in html:
|
if "AwsWafIntegration" in html and "challenge.js" in html:
|
||||||
raise WafChallengeError("Got AWS WAF challenge page — cookies expired")
|
raise WafChallengeError(
|
||||||
|
"Got AWS WAF challenge page — cookies expired"
|
||||||
|
)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
if resp.status_code in (202, 403):
|
if resp.status_code in (202, 403):
|
||||||
raise WafChallengeError(f"HTTP {resp.status_code} — cookies likely expired")
|
raise WafChallengeError(
|
||||||
|
f"HTTP {resp.status_code} — cookies likely expired"
|
||||||
|
)
|
||||||
|
|
||||||
if resp.status_code in (429, 500, 502, 503, 504):
|
if resp.status_code in (429, 500, 502, 503, 504):
|
||||||
delay = RETRY_BASE_DELAY * (2 ** attempt)
|
delay = RETRY_BASE_DELAY * (2**attempt)
|
||||||
log.warning(
|
log.warning(
|
||||||
"HTTP %d from %s, retry %d/%d in %.1fs",
|
"HTTP %d from %s, retry %d/%d in %.1fs",
|
||||||
resp.status_code, url, attempt + 1, max_retries, delay,
|
resp.status_code,
|
||||||
|
url,
|
||||||
|
attempt + 1,
|
||||||
|
max_retries,
|
||||||
|
delay,
|
||||||
)
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
continue
|
continue
|
||||||
|
|
@ -187,10 +201,14 @@ def fetch_page(
|
||||||
raise
|
raise
|
||||||
except RequestsError as e:
|
except RequestsError as e:
|
||||||
openrent_errors_total.labels(type=type(e).__name__).inc()
|
openrent_errors_total.labels(type=type(e).__name__).inc()
|
||||||
delay = RETRY_BASE_DELAY * (2 ** attempt)
|
delay = RETRY_BASE_DELAY * (2**attempt)
|
||||||
log.warning(
|
log.warning(
|
||||||
"%s from %s, retry %d/%d in %.1fs",
|
"%s from %s, retry %d/%d in %.1fs",
|
||||||
type(e).__name__, url, attempt + 1, max_retries, delay,
|
type(e).__name__,
|
||||||
|
url,
|
||||||
|
attempt + 1,
|
||||||
|
max_retries,
|
||||||
|
delay,
|
||||||
)
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
||||||
|
|
@ -247,7 +265,9 @@ def _extract_bedrooms_from_title(title: str) -> int | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _extract_beds_baths_from_features(feature_items: list) -> tuple[int | None, int | None]:
|
def _extract_beds_baths_from_features(
|
||||||
|
feature_items: list,
|
||||||
|
) -> tuple[int | None, int | None]:
|
||||||
"""Extract bedrooms and bathrooms from feature list items.
|
"""Extract bedrooms and bathrooms from feature list items.
|
||||||
|
|
||||||
OpenRent search cards have <ul> with items like:
|
OpenRent search cards have <ul> with items like:
|
||||||
|
|
@ -442,11 +462,7 @@ def parse_search_results(html: str) -> list[dict]:
|
||||||
# --- Coordinates from data attributes (may not be present on cards) ---
|
# --- Coordinates from data attributes (may not be present on cards) ---
|
||||||
for el in [card] + card.select("[data-lat], [data-latitude]"):
|
for el in [card] + card.select("[data-lat], [data-latitude]"):
|
||||||
lat = el.get("data-lat") or el.get("data-latitude")
|
lat = el.get("data-lat") or el.get("data-latitude")
|
||||||
lng = (
|
lng = el.get("data-lng") or el.get("data-longitude") or el.get("data-lon")
|
||||||
el.get("data-lng")
|
|
||||||
or el.get("data-longitude")
|
|
||||||
or el.get("data-lon")
|
|
||||||
)
|
|
||||||
if lat and lng:
|
if lat and lng:
|
||||||
try:
|
try:
|
||||||
prop["lat"] = float(lat)
|
prop["lat"] = float(lat)
|
||||||
|
|
@ -543,9 +559,7 @@ def parse_property_detail(html: str) -> dict:
|
||||||
break
|
break
|
||||||
|
|
||||||
# --- Description for floor area ---
|
# --- Description for floor area ---
|
||||||
desc_el = soup.select_one(
|
desc_el = soup.select_one(".description, [class*='description'], #description")
|
||||||
".description, [class*='description'], #description"
|
|
||||||
)
|
|
||||||
if desc_el:
|
if desc_el:
|
||||||
details["description"] = desc_el.get_text(strip=True)
|
details["description"] = desc_el.get_text(strip=True)
|
||||||
|
|
||||||
|
|
@ -567,7 +581,12 @@ def map_property_type(raw_type: str | None) -> str:
|
||||||
lower = raw_type.lower()
|
lower = raw_type.lower()
|
||||||
if "room" in lower or "shared" in lower:
|
if "room" in lower or "shared" in lower:
|
||||||
return "Other"
|
return "Other"
|
||||||
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
|
if (
|
||||||
|
"flat" in lower
|
||||||
|
or "apartment" in lower
|
||||||
|
or "maisonette" in lower
|
||||||
|
or "studio" in lower
|
||||||
|
):
|
||||||
return "Flats/Maisonettes"
|
return "Flats/Maisonettes"
|
||||||
if "detached" in lower and "semi" not in lower:
|
if "detached" in lower and "semi" not in lower:
|
||||||
return "Detached"
|
return "Detached"
|
||||||
|
|
@ -647,7 +666,8 @@ def transform_property(
|
||||||
elif search_data.get("outcode"):
|
elif search_data.get("outcode"):
|
||||||
# No spatial index — try outcode lookup as fallback
|
# No spatial index — try outcode lookup as fallback
|
||||||
outcode_pcs = _resolve_outcode_postcodes(
|
outcode_pcs = _resolve_outcode_postcodes(
|
||||||
search_data["outcode"], pc_coords,
|
search_data["outcode"],
|
||||||
|
pc_coords,
|
||||||
)
|
)
|
||||||
if outcode_pcs:
|
if outcode_pcs:
|
||||||
postcode = outcode_pcs[0]
|
postcode = outcode_pcs[0]
|
||||||
|
|
@ -708,7 +728,8 @@ def transform_property(
|
||||||
|
|
||||||
prop_id = search_data.get("id", "")
|
prop_id = search_data.get("id", "")
|
||||||
listing_url = search_data.get(
|
listing_url = search_data.get(
|
||||||
"url", f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
|
"url",
|
||||||
|
f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
|
||||||
)
|
)
|
||||||
description = detail.get("description") or search_data.get("description", "")
|
description = detail.get("description") or search_data.get("description", "")
|
||||||
|
|
||||||
|
|
@ -775,7 +796,10 @@ def search_outcode(
|
||||||
time.sleep(DELAY_BETWEEN_PAGES * 0.5)
|
time.sleep(DELAY_BETWEEN_PAGES * 0.5)
|
||||||
|
|
||||||
transformed = transform_property(
|
transformed = transform_property(
|
||||||
search_data, detail_data, pc_index, pc_coords,
|
search_data,
|
||||||
|
detail_data,
|
||||||
|
pc_index,
|
||||||
|
pc_coords,
|
||||||
)
|
)
|
||||||
if transformed:
|
if transformed:
|
||||||
properties.append(transformed)
|
properties.append(transformed)
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,9 @@ def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
|
||||||
if outcode in outcode_cache:
|
if outcode in outcode_cache:
|
||||||
return outcode_cache[outcode]
|
return outcode_cache[outcode]
|
||||||
|
|
||||||
data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"})
|
data = fetch_with_retry(
|
||||||
|
client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}
|
||||||
|
)
|
||||||
if not data:
|
if not data:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -61,7 +63,12 @@ def search_outcode(
|
||||||
|
|
||||||
data = fetch_with_retry(client, SEARCH_URL, params)
|
data = fetch_with_retry(client, SEARCH_URL, params)
|
||||||
if not data:
|
if not data:
|
||||||
log.warning("Failed to fetch index %d for %s/%s", index, outcode, channel_cfg["channel"])
|
log.warning(
|
||||||
|
"Failed to fetch index %d for %s/%s",
|
||||||
|
index,
|
||||||
|
outcode,
|
||||||
|
channel_cfg["channel"],
|
||||||
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
raw_props = data.get("properties", [])
|
raw_props = data.get("properties", [])
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,16 @@ from dataclasses import dataclass, field
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
|
||||||
from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SCRAPE_HOMECOUK, SCRAPE_OPENRENT, SCRAPE_RIGHTMOVE, SEED
|
from constants import (
|
||||||
|
ARCGIS_PATH,
|
||||||
|
CHANNELS,
|
||||||
|
DATA_DIR,
|
||||||
|
DELAY_BETWEEN_OUTCODES,
|
||||||
|
SCRAPE_HOMECOUK,
|
||||||
|
SCRAPE_OPENRENT,
|
||||||
|
SCRAPE_RIGHTMOVE,
|
||||||
|
SEED,
|
||||||
|
)
|
||||||
from homecouk import CookiesExpiredError
|
from homecouk import CookiesExpiredError
|
||||||
from homecouk import load_cookies as load_homecouk_cookies
|
from homecouk import load_cookies as load_homecouk_cookies
|
||||||
from homecouk import make_client as make_homecouk_client
|
from homecouk import make_client as make_homecouk_client
|
||||||
|
|
@ -64,13 +73,23 @@ def _sync_gauges() -> None:
|
||||||
scrape_outcodes_done.set(status.outcodes_done)
|
scrape_outcodes_done.set(status.outcodes_done)
|
||||||
scrape_outcodes_total.set(status.outcodes_total)
|
scrape_outcodes_total.set(status.outcodes_total)
|
||||||
# Total properties (both sources combined)
|
# Total properties (both sources combined)
|
||||||
scrape_properties_total.labels(channel="buy", source="total").set(status.properties_buy)
|
scrape_properties_total.labels(channel="buy", source="total").set(
|
||||||
scrape_properties_total.labels(channel="rent", source="total").set(status.properties_rent)
|
status.properties_buy
|
||||||
|
)
|
||||||
|
scrape_properties_total.labels(channel="rent", source="total").set(
|
||||||
|
status.properties_rent
|
||||||
|
)
|
||||||
# Per-source breakdown for current channel
|
# Per-source breakdown for current channel
|
||||||
ch = "buy" if status.channel == "BUY" else "rent"
|
ch = "buy" if status.channel == "BUY" else "rent"
|
||||||
scrape_properties_total.labels(channel=ch, source="rightmove").set(status.rm_properties)
|
scrape_properties_total.labels(channel=ch, source="rightmove").set(
|
||||||
scrape_properties_total.labels(channel=ch, source="homecouk").set(status.hk_properties)
|
status.rm_properties
|
||||||
scrape_properties_total.labels(channel=ch, source="openrent").set(status.or_properties)
|
)
|
||||||
|
scrape_properties_total.labels(channel=ch, source="homecouk").set(
|
||||||
|
status.hk_properties
|
||||||
|
)
|
||||||
|
scrape_properties_total.labels(channel=ch, source="openrent").set(
|
||||||
|
status.or_properties
|
||||||
|
)
|
||||||
if status.started_at:
|
if status.started_at:
|
||||||
end = status.finished_at if status.finished_at else time.time()
|
end = status.finished_at if status.finished_at else time.time()
|
||||||
scrape_elapsed_seconds.set(end - status.started_at)
|
scrape_elapsed_seconds.set(end - status.started_at)
|
||||||
|
|
@ -86,7 +105,9 @@ def load_outcodes() -> list[str]:
|
||||||
log.info("England postcodes: %d", len(england))
|
log.info("England postcodes: %d", len(england))
|
||||||
|
|
||||||
outcodes = (
|
outcodes = (
|
||||||
england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode"))
|
england.select(
|
||||||
|
pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode")
|
||||||
|
)
|
||||||
.drop_nulls()
|
.drop_nulls()
|
||||||
.get_column("outcode")
|
.get_column("outcode")
|
||||||
.unique()
|
.unique()
|
||||||
|
|
@ -101,7 +122,9 @@ def build_postcode_index() -> PostcodeSpatialIndex:
|
||||||
"""Build spatial index from arcgis England postcodes."""
|
"""Build spatial index from arcgis England postcodes."""
|
||||||
log.info("Building postcode spatial index from %s", ARCGIS_PATH)
|
log.info("Building postcode spatial index from %s", ARCGIS_PATH)
|
||||||
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
||||||
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
|
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(
|
||||||
|
subset=["lat", "long"]
|
||||||
|
)
|
||||||
return PostcodeSpatialIndex(
|
return PostcodeSpatialIndex(
|
||||||
england.get_column("lat").to_list(),
|
england.get_column("lat").to_list(),
|
||||||
england.get_column("long").to_list(),
|
england.get_column("long").to_list(),
|
||||||
|
|
@ -114,7 +137,9 @@ def build_postcode_coords() -> dict[str, tuple[float, float]]:
|
||||||
Used by OpenRent scraper to resolve coordinates from postcodes."""
|
Used by OpenRent scraper to resolve coordinates from postcodes."""
|
||||||
log.info("Building postcode coords lookup from %s", ARCGIS_PATH)
|
log.info("Building postcode coords lookup from %s", ARCGIS_PATH)
|
||||||
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
|
||||||
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
|
england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(
|
||||||
|
subset=["lat", "long"]
|
||||||
|
)
|
||||||
coords: dict[str, tuple[float, float]] = {}
|
coords: dict[str, tuple[float, float]] = {}
|
||||||
for pcd, lat, lng in zip(
|
for pcd, lat, lng in zip(
|
||||||
england.get_column("pcd").to_list(),
|
england.get_column("pcd").to_list(),
|
||||||
|
|
@ -179,7 +204,9 @@ def run_scrape(
|
||||||
log.info("home.co.uk scraping ENABLED")
|
log.info("home.co.uk scraping ENABLED")
|
||||||
homecouk_enabled.set(1)
|
homecouk_enabled.set(1)
|
||||||
else:
|
else:
|
||||||
log.info("home.co.uk scraping DISABLED (need FlareSolverr or HOMECOUK_CF_CLEARANCE + HOMECOUK_SESSION)")
|
log.info(
|
||||||
|
"home.co.uk scraping DISABLED (need FlareSolverr or HOMECOUK_CF_CLEARANCE + HOMECOUK_SESSION)"
|
||||||
|
)
|
||||||
homecouk_enabled.set(0)
|
homecouk_enabled.set(0)
|
||||||
|
|
||||||
# OpenRent: must be enabled via SCRAPE_OPENRENT + cookies available
|
# OpenRent: must be enabled via SCRAPE_OPENRENT + cookies available
|
||||||
|
|
@ -195,7 +222,9 @@ def run_scrape(
|
||||||
log.info("OpenRent scraping ENABLED")
|
log.info("OpenRent scraping ENABLED")
|
||||||
openrent_enabled.set(1)
|
openrent_enabled.set(1)
|
||||||
else:
|
else:
|
||||||
log.info("OpenRent scraping DISABLED (need FlareSolverr or OPENRENT_WAF_TOKEN)")
|
log.info(
|
||||||
|
"OpenRent scraping DISABLED (need FlareSolverr or OPENRENT_WAF_TOKEN)"
|
||||||
|
)
|
||||||
openrent_enabled.set(0)
|
openrent_enabled.set(0)
|
||||||
|
|
||||||
# Build postcode coords if OpenRent is active and caller didn't provide them
|
# Build postcode coords if OpenRent is active and caller didn't provide them
|
||||||
|
|
@ -207,7 +236,9 @@ def run_scrape(
|
||||||
channel_name = channel_cfg["channel"]
|
channel_name = channel_cfg["channel"]
|
||||||
file_suffix = "buy" if channel_name == "BUY" else "rent"
|
file_suffix = "buy" if channel_name == "BUY" else "rent"
|
||||||
all_properties: dict[str, dict] = {} # dedup by id
|
all_properties: dict[str, dict] = {} # dedup by id
|
||||||
seen_dedup_keys: set[tuple] = set() # cross-source dedup by (postcode, beds, price)
|
seen_dedup_keys: set[tuple] = (
|
||||||
|
set()
|
||||||
|
) # cross-source dedup by (postcode, beds, price)
|
||||||
rm_count = 0 # Rightmove properties this channel
|
rm_count = 0 # Rightmove properties this channel
|
||||||
hk_count = 0 # home.co.uk properties this channel
|
hk_count = 0 # home.co.uk properties this channel
|
||||||
hk_dedup_count = 0 # home.co.uk skipped as cross-source duplicates
|
hk_dedup_count = 0 # home.co.uk skipped as cross-source duplicates
|
||||||
|
|
@ -222,15 +253,22 @@ def run_scrape(
|
||||||
status.hk_properties = 0
|
status.hk_properties = 0
|
||||||
status.or_properties = 0
|
status.or_properties = 0
|
||||||
|
|
||||||
log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled))
|
log.info(
|
||||||
|
"=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled)
|
||||||
|
)
|
||||||
|
|
||||||
for i, outcode in enumerate(shuffled):
|
for i, outcode in enumerate(shuffled):
|
||||||
with status_lock:
|
with status_lock:
|
||||||
status.outcode = outcode
|
status.outcode = outcode
|
||||||
status.outcodes_done = i
|
status.outcodes_done = i
|
||||||
|
|
||||||
log.debug("Outcode %s (%d/%d) — %d properties so far",
|
log.debug(
|
||||||
outcode, i + 1, len(shuffled), len(all_properties))
|
"Outcode %s (%d/%d) — %d properties so far",
|
||||||
|
outcode,
|
||||||
|
i + 1,
|
||||||
|
len(shuffled),
|
||||||
|
len(all_properties),
|
||||||
|
)
|
||||||
|
|
||||||
made_requests = False
|
made_requests = False
|
||||||
|
|
||||||
|
|
@ -240,9 +278,13 @@ def run_scrape(
|
||||||
try:
|
try:
|
||||||
outcode_id = resolve_outcode_id(client, outcode)
|
outcode_id = resolve_outcode_id(client, outcode)
|
||||||
if not outcode_id:
|
if not outcode_id:
|
||||||
log.debug("No Rightmove ID for outcode %s, skipping", outcode)
|
log.debug(
|
||||||
|
"No Rightmove ID for outcode %s, skipping", outcode
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index)
|
props = search_outcode(
|
||||||
|
client, outcode_id, outcode, channel_cfg, pc_index
|
||||||
|
)
|
||||||
for p in props:
|
for p in props:
|
||||||
pid = p["id"]
|
pid = p["id"]
|
||||||
if pid not in all_properties:
|
if pid not in all_properties:
|
||||||
|
|
@ -261,7 +303,10 @@ def run_scrape(
|
||||||
made_requests = True
|
made_requests = True
|
||||||
try:
|
try:
|
||||||
hk_props = homecouk_search_outcode(
|
hk_props = homecouk_search_outcode(
|
||||||
hk_client, outcode, channel_name, pc_index,
|
hk_client,
|
||||||
|
outcode,
|
||||||
|
channel_name,
|
||||||
|
pc_index,
|
||||||
)
|
)
|
||||||
for p in hk_props:
|
for p in hk_props:
|
||||||
pid = p["id"]
|
pid = p["id"]
|
||||||
|
|
@ -276,9 +321,13 @@ def run_scrape(
|
||||||
seen_dedup_keys.add(key)
|
seen_dedup_keys.add(key)
|
||||||
hk_count += 1
|
hk_count += 1
|
||||||
if hk_props:
|
if hk_props:
|
||||||
log.info("home.co.uk %s: +%d properties", outcode, len(hk_props))
|
log.info(
|
||||||
|
"home.co.uk %s: +%d properties", outcode, len(hk_props)
|
||||||
|
)
|
||||||
except CookiesExpiredError:
|
except CookiesExpiredError:
|
||||||
log.warning("home.co.uk cookies expired — attempting refresh via FlareSolverr")
|
log.warning(
|
||||||
|
"home.co.uk cookies expired — attempting refresh via FlareSolverr"
|
||||||
|
)
|
||||||
hk_client.close()
|
hk_client.close()
|
||||||
hk_result = load_homecouk_cookies()
|
hk_result = load_homecouk_cookies()
|
||||||
if hk_result:
|
if hk_result:
|
||||||
|
|
@ -286,13 +335,17 @@ def run_scrape(
|
||||||
log.info("home.co.uk cookies refreshed, continuing")
|
log.info("home.co.uk cookies refreshed, continuing")
|
||||||
cookie_refreshes_total.labels(result="success").inc()
|
cookie_refreshes_total.labels(result="success").inc()
|
||||||
else:
|
else:
|
||||||
log.warning("Cookie refresh failed, disabling home.co.uk for rest of scrape")
|
log.warning(
|
||||||
|
"Cookie refresh failed, disabling home.co.uk for rest of scrape"
|
||||||
|
)
|
||||||
hk_client = None
|
hk_client = None
|
||||||
hk_failed = True
|
hk_failed = True
|
||||||
homecouk_enabled.set(0)
|
homecouk_enabled.set(0)
|
||||||
cookie_refreshes_total.labels(result="failure").inc()
|
cookie_refreshes_total.labels(result="failure").inc()
|
||||||
with status_lock:
|
with status_lock:
|
||||||
status.errors.append("home.co.uk cookies expired and refresh failed")
|
status.errors.append(
|
||||||
|
"home.co.uk cookies expired and refresh failed"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
msg = f"Error scraping home.co.uk {outcode}/{channel_name}: {e}"
|
msg = f"Error scraping home.co.uk {outcode}/{channel_name}: {e}"
|
||||||
log.error(msg)
|
log.error(msg)
|
||||||
|
|
@ -305,7 +358,10 @@ def run_scrape(
|
||||||
made_requests = True
|
made_requests = True
|
||||||
try:
|
try:
|
||||||
or_props = openrent_search_outcode(
|
or_props = openrent_search_outcode(
|
||||||
or_client, outcode, pc_index, pc_coords,
|
or_client,
|
||||||
|
outcode,
|
||||||
|
pc_index,
|
||||||
|
pc_coords,
|
||||||
)
|
)
|
||||||
for p in or_props:
|
for p in or_props:
|
||||||
pid = p["id"]
|
pid = p["id"]
|
||||||
|
|
@ -318,9 +374,13 @@ def run_scrape(
|
||||||
seen_dedup_keys.add(key)
|
seen_dedup_keys.add(key)
|
||||||
or_count += 1
|
or_count += 1
|
||||||
if or_props:
|
if or_props:
|
||||||
log.info("OpenRent %s: +%d properties", outcode, len(or_props))
|
log.info(
|
||||||
|
"OpenRent %s: +%d properties", outcode, len(or_props)
|
||||||
|
)
|
||||||
except WafChallengeError:
|
except WafChallengeError:
|
||||||
log.warning("OpenRent WAF cookies expired — attempting refresh via FlareSolverr")
|
log.warning(
|
||||||
|
"OpenRent WAF cookies expired — attempting refresh via FlareSolverr"
|
||||||
|
)
|
||||||
or_client.close()
|
or_client.close()
|
||||||
or_result = load_openrent_cookies()
|
or_result = load_openrent_cookies()
|
||||||
if or_result:
|
if or_result:
|
||||||
|
|
@ -328,13 +388,17 @@ def run_scrape(
|
||||||
log.info("OpenRent cookies refreshed, continuing")
|
log.info("OpenRent cookies refreshed, continuing")
|
||||||
cookie_refreshes_total.labels(result="success").inc()
|
cookie_refreshes_total.labels(result="success").inc()
|
||||||
else:
|
else:
|
||||||
log.warning("Cookie refresh failed, disabling OpenRent for rest of scrape")
|
log.warning(
|
||||||
|
"Cookie refresh failed, disabling OpenRent for rest of scrape"
|
||||||
|
)
|
||||||
or_client = None
|
or_client = None
|
||||||
or_failed = True
|
or_failed = True
|
||||||
openrent_enabled.set(0)
|
openrent_enabled.set(0)
|
||||||
cookie_refreshes_total.labels(result="failure").inc()
|
cookie_refreshes_total.labels(result="failure").inc()
|
||||||
with status_lock:
|
with status_lock:
|
||||||
status.errors.append("OpenRent WAF cookies expired and refresh failed")
|
status.errors.append(
|
||||||
|
"OpenRent WAF cookies expired and refresh failed"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
msg = f"Error scraping OpenRent {outcode}/{channel_name}: {e}"
|
msg = f"Error scraping OpenRent {outcode}/{channel_name}: {e}"
|
||||||
log.error(msg)
|
log.error(msg)
|
||||||
|
|
@ -352,8 +416,14 @@ def run_scrape(
|
||||||
status.or_properties = or_count
|
status.or_properties = or_count
|
||||||
_sync_gauges()
|
_sync_gauges()
|
||||||
|
|
||||||
log.info("Outcode %s: total %d (rm: %d, hk: %d, or: %d)",
|
log.info(
|
||||||
outcode, len(all_properties), rm_count, hk_count, or_count)
|
"Outcode %s: total %d (rm: %d, hk: %d, or: %d)",
|
||||||
|
outcode,
|
||||||
|
len(all_properties),
|
||||||
|
rm_count,
|
||||||
|
hk_count,
|
||||||
|
or_count,
|
||||||
|
)
|
||||||
|
|
||||||
if made_requests and i < len(shuffled) - 1:
|
if made_requests and i < len(shuffled) - 1:
|
||||||
time.sleep(DELAY_BETWEEN_OUTCODES)
|
time.sleep(DELAY_BETWEEN_OUTCODES)
|
||||||
|
|
@ -373,7 +443,11 @@ def run_scrape(
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, cross-dedup: %d) ===",
|
"=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, cross-dedup: %d) ===",
|
||||||
channel_name, len(deduped), rm_count, hk_count, or_count,
|
channel_name,
|
||||||
|
len(deduped),
|
||||||
|
rm_count,
|
||||||
|
hk_count,
|
||||||
|
or_count,
|
||||||
hk_dedup_count + or_dedup_count,
|
hk_dedup_count + or_dedup_count,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -382,8 +456,12 @@ def run_scrape(
|
||||||
status.finished_at = time.time()
|
status.finished_at = time.time()
|
||||||
_sync_gauges()
|
_sync_gauges()
|
||||||
elapsed = status.finished_at - status.started_at
|
elapsed = status.finished_at - status.started_at
|
||||||
log.info("Scrape complete in %.0fs — buy: %d, rent: %d",
|
log.info(
|
||||||
elapsed, status.properties_buy, status.properties_rent)
|
"Scrape complete in %.0fs — buy: %d, rent: %d",
|
||||||
|
elapsed,
|
||||||
|
status.properties_buy,
|
||||||
|
status.properties_rent,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception("Fatal scrape error")
|
log.exception("Fatal scrape error")
|
||||||
|
|
|
||||||
|
|
@ -11,12 +11,16 @@ class PostcodeSpatialIndex:
|
||||||
"""Grid-based spatial index over arcgis postcodes for nearest-lookup."""
|
"""Grid-based spatial index over arcgis postcodes for nearest-lookup."""
|
||||||
|
|
||||||
def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
|
def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
|
||||||
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list)
|
self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(
|
||||||
|
list
|
||||||
|
)
|
||||||
for lat, lng, pcd in zip(lats, lngs, postcodes):
|
for lat, lng, pcd in zip(lats, lngs, postcodes):
|
||||||
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
||||||
gy = int(math.floor(lat / GRID_CELL_SIZE))
|
gy = int(math.floor(lat / GRID_CELL_SIZE))
|
||||||
self.grid[(gx, gy)].append((lat, lng, pcd))
|
self.grid[(gx, gy)].append((lat, lng, pcd))
|
||||||
log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats))
|
log.info(
|
||||||
|
"Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats)
|
||||||
|
)
|
||||||
|
|
||||||
def nearest(self, lat: float, lng: float) -> str | None:
|
def nearest(self, lat: float, lng: float) -> str | None:
|
||||||
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
gx = int(math.floor(lng / GRID_CELL_SIZE))
|
||||||
|
|
|
||||||
|
|
@ -60,9 +60,7 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
||||||
"Property type": [p["Property type"] for p in properties],
|
"Property type": [p["Property type"] for p in properties],
|
||||||
"Property sub-type": [p["Property sub-type"] for p in properties],
|
"Property sub-type": [p["Property sub-type"] for p in properties],
|
||||||
"Price qualifier": [p["Price qualifier"] for p in properties],
|
"Price qualifier": [p["Price qualifier"] for p in properties],
|
||||||
"Total floor area (sqm)": [
|
"Total floor area (sqm)": [p["Total floor area (sqm)"] for p in properties],
|
||||||
p["Total floor area (sqm)"] for p in properties
|
|
||||||
],
|
|
||||||
"Listing URL": [p["Listing URL"] for p in properties],
|
"Listing URL": [p["Listing URL"] for p in properties],
|
||||||
"Listing features": [p["Listing features"] for p in properties],
|
"Listing features": [p["Listing features"] for p in properties],
|
||||||
"Listing date": listing_dates,
|
"Listing date": listing_dates,
|
||||||
|
|
|
||||||
|
|
@ -51,9 +51,19 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
|
||||||
if 49 <= lat <= 56 and -7 <= lng <= 2:
|
if 49 <= lat <= 56 and -7 <= lng <= 2:
|
||||||
return lat, lng
|
return lat, lng
|
||||||
if 49 <= lng <= 56 and -7 <= lat <= 2:
|
if 49 <= lng <= 56 and -7 <= lat <= 2:
|
||||||
log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
|
log.debug(
|
||||||
|
"Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f",
|
||||||
|
lat,
|
||||||
|
lng,
|
||||||
|
lng,
|
||||||
|
lat,
|
||||||
|
)
|
||||||
return lng, lat
|
return lng, lat
|
||||||
log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
|
log.warning(
|
||||||
|
"Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f",
|
||||||
|
lat,
|
||||||
|
lng,
|
||||||
|
)
|
||||||
return lat, lng
|
return lat, lng
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -66,7 +76,9 @@ def normalize_price(amount: int, frequency: str) -> int:
|
||||||
return amount
|
return amount
|
||||||
|
|
||||||
|
|
||||||
def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
|
def transform_property(
|
||||||
|
prop: dict, outcode: str, pc_index: PostcodeSpatialIndex
|
||||||
|
) -> dict | None:
|
||||||
"""Transform a raw Rightmove property dict into our output schema."""
|
"""Transform a raw Rightmove property dict into our output schema."""
|
||||||
loc = prop.get("location")
|
loc = prop.get("location")
|
||||||
if not loc:
|
if not loc:
|
||||||
|
|
@ -86,13 +98,19 @@ def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex)
|
||||||
price = normalize_price(int(amount), frequency)
|
price = normalize_price(int(amount), frequency)
|
||||||
|
|
||||||
display_prices = price_obj.get("displayPrices", [])
|
display_prices = price_obj.get("displayPrices", [])
|
||||||
price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
|
price_qualifier = (
|
||||||
|
display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
|
||||||
|
)
|
||||||
|
|
||||||
sub_type = prop.get("propertySubType", "")
|
sub_type = prop.get("propertySubType", "")
|
||||||
bedrooms = prop.get("bedrooms", 0) or 0
|
bedrooms = prop.get("bedrooms", 0) or 0
|
||||||
bathrooms = prop.get("bathrooms", 0) or 0
|
bathrooms = prop.get("bathrooms", 0) or 0
|
||||||
|
|
||||||
key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
|
key_features = [
|
||||||
|
kf.get("description", "")
|
||||||
|
for kf in prop.get("keyFeatures", [])
|
||||||
|
if kf.get("description")
|
||||||
|
]
|
||||||
|
|
||||||
postcode = pc_index.nearest(lat, lng)
|
postcode = pc_index.nearest(lat, lng)
|
||||||
if not postcode:
|
if not postcode:
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,9 @@ def download_and_convert(output_path: Path) -> None:
|
||||||
df = pl.concat(frames)
|
df = pl.concat(frames)
|
||||||
print(f"Total rows: {df.height}")
|
print(f"Total rows: {df.height}")
|
||||||
|
|
||||||
result = df.rename({"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}).with_columns(
|
result = df.rename(
|
||||||
|
{"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}
|
||||||
|
).with_columns(
|
||||||
pl.col("population").cast(pl.UInt32),
|
pl.col("population").cast(pl.UInt32),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -48,7 +50,9 @@ def download_and_convert(output_path: Path) -> None:
|
||||||
result = result.filter(pl.col("lsoa21").str.starts_with("E"))
|
result = result.filter(pl.col("lsoa21").str.starts_with("E"))
|
||||||
|
|
||||||
print(f"England LSOAs: {result.height}")
|
print(f"England LSOAs: {result.height}")
|
||||||
print(f"Population range: {result['population'].min()} - {result['population'].max()}")
|
print(
|
||||||
|
f"Population range: {result['population'].min()} - {result['population'].max()}"
|
||||||
|
)
|
||||||
print(f"Mean population: {result['population'].mean():.0f}")
|
print(f"Mean population: {result['population'].mean():.0f}")
|
||||||
|
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,11 @@ class PlaceHandler(osmium.SimpleHandler):
|
||||||
station_tag = tags.get("station", "")
|
station_tag = tags.get("station", "")
|
||||||
network = tags.get("network", "").lower()
|
network = tags.get("network", "").lower()
|
||||||
# Skip tram stops
|
# Skip tram stops
|
||||||
if station_tag == "light_rail" or "tramlink" in network or "tram" in network:
|
if (
|
||||||
|
station_tag == "light_rail"
|
||||||
|
or "tramlink" in network
|
||||||
|
or "tram" in network
|
||||||
|
):
|
||||||
return
|
return
|
||||||
display_name = _station_display_name(name, tags)
|
display_name = _station_display_name(name, tags)
|
||||||
self._add(display_name, "station", lat, lon, population)
|
self._add(display_name, "station", lat, lon, population)
|
||||||
|
|
@ -131,9 +135,7 @@ def main() -> None:
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output", type=Path, required=True, help="Output parquet file path"
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("--pbf", type=Path, required=True, help="Path to OSM PBF file")
|
||||||
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--boundary",
|
"--boundary",
|
||||||
type=Path,
|
type=Path,
|
||||||
|
|
|
||||||
|
|
@ -111,9 +111,7 @@ def main() -> None:
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output", type=Path, required=True, help="Output parquet file path"
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("--pbf", type=Path, required=True, help="Path to OSM PBF file")
|
||||||
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--boundary",
|
"--boundary",
|
||||||
type=Path,
|
type=Path,
|
||||||
|
|
|
||||||
|
|
@ -99,10 +99,14 @@ def convert_to_parquet(xls_path: Path, parquet_path: Path) -> None:
|
||||||
combined = pl.concat(frames)
|
combined = pl.concat(frames)
|
||||||
|
|
||||||
# Remap old LA codes to new unitary authority codes and average medians
|
# Remap old LA codes to new unitary authority codes and average medians
|
||||||
combined = combined.with_columns(
|
combined = (
|
||||||
pl.col("area_code").replace(LA_CONSOLIDATION),
|
combined.with_columns(
|
||||||
).group_by("area_code", "bedrooms").agg(
|
pl.col("area_code").replace(LA_CONSOLIDATION),
|
||||||
pl.col("median_monthly_rent").mean(),
|
)
|
||||||
|
.group_by("area_code", "bedrooms")
|
||||||
|
.agg(
|
||||||
|
pl.col("median_monthly_rent").mean(),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Combined: {combined.shape}")
|
print(f"Combined: {combined.shape}")
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,7 @@ TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
||||||
|
|
||||||
def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
|
def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
|
||||||
df = pl.read_parquet(postcodes_path, columns=["Postcode"])
|
df = pl.read_parquet(postcodes_path, columns=["Postcode"])
|
||||||
outcodes = sorted(
|
outcodes = sorted(set(df["Postcode"].str.split(" ").list.first().to_list()) - {""})
|
||||||
set(df["Postcode"].str.split(" ").list.first().to_list()) - {""}
|
|
||||||
)
|
|
||||||
print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...")
|
print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...")
|
||||||
|
|
||||||
mapping: dict[str, str] = {}
|
mapping: dict[str, str] = {}
|
||||||
|
|
@ -28,11 +26,9 @@ def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
found = False
|
found = False
|
||||||
for m in data.get("matches", []):
|
for m in data.get("matches", []):
|
||||||
if (
|
if m["type"] == "OUTCODE" and m["displayName"].upper().replace(
|
||||||
m["type"] == "OUTCODE"
|
" ", ""
|
||||||
and m["displayName"].upper().replace(" ", "")
|
) == oc.upper().replace(" ", ""):
|
||||||
== oc.upper().replace(" ", "")
|
|
||||||
):
|
|
||||||
mapping[oc] = str(m["id"])
|
mapping[oc] = str(m["id"])
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
|
|
@ -57,9 +53,7 @@ def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(description="Fetch Rightmove outcode ID mapping")
|
||||||
description="Fetch Rightmove outcode ID mapping"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--postcodes", type=Path, required=True, help="postcode.parquet path"
|
"--postcodes", type=Path, required=True, help="postcode.parquet path"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,9 @@ def ensure_pmtiles_cli(bin_path: Path, version: str) -> None:
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description=__doc__)
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
parser.add_argument("--output", type=Path, required=True, help="Output .pmtiles path")
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output .pmtiles path"
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--pmtiles-version", default="1.22.3", help="go-pmtiles release version"
|
"--pmtiles-version", default="1.22.3", help="go-pmtiles release version"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,9 @@ NR_TIMETABLE_URL = "https://opendata.nationalrail.co.uk/api/staticfeeds/3.0/time
|
||||||
USER_AGENT = "property-map-pipeline/1.0 (https://github.com)"
|
USER_AGENT = "property-map-pipeline/1.0 (https://github.com)"
|
||||||
|
|
||||||
|
|
||||||
def _download_http(url: str, dest: Path, *, desc: str, headers: dict | None = None) -> None:
|
def _download_http(
|
||||||
|
url: str, dest: Path, *, desc: str, headers: dict | None = None
|
||||||
|
) -> None:
|
||||||
"""Stream-download a URL to a file with progress bar."""
|
"""Stream-download a URL to a file with progress bar."""
|
||||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
tmp = dest.with_suffix(dest.suffix + ".tmp")
|
tmp = dest.with_suffix(dest.suffix + ".tmp")
|
||||||
|
|
@ -117,9 +119,10 @@ def clean_gtfs(src: Path, dst: Path) -> None:
|
||||||
return
|
return
|
||||||
|
|
||||||
print("Cleaning GTFS for R5 compatibility...")
|
print("Cleaning GTFS for R5 compatibility...")
|
||||||
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(
|
with (
|
||||||
dst, "w", zipfile.ZIP_DEFLATED
|
zipfile.ZipFile(src, "r") as zin,
|
||||||
) as zout:
|
zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
|
||||||
|
):
|
||||||
for info in zin.infolist():
|
for info in zin.infolist():
|
||||||
if info.filename == "stop_times.txt":
|
if info.filename == "stop_times.txt":
|
||||||
dropped = 0
|
dropped = 0
|
||||||
|
|
@ -127,7 +130,9 @@ def clean_gtfs(src: Path, dst: Path) -> None:
|
||||||
header = f.readline()
|
header = f.readline()
|
||||||
header_str = header.decode("utf-8").strip()
|
header_str = header.decode("utf-8").strip()
|
||||||
cols = header_str.split(",")
|
cols = header_str.split(",")
|
||||||
arr_idx = cols.index("arrival_time") if "arrival_time" in cols else -1
|
arr_idx = (
|
||||||
|
cols.index("arrival_time") if "arrival_time" in cols else -1
|
||||||
|
)
|
||||||
dep_idx = (
|
dep_idx = (
|
||||||
cols.index("departure_time") if "departure_time" in cols else -1
|
cols.index("departure_time") if "departure_time" in cols else -1
|
||||||
)
|
)
|
||||||
|
|
@ -179,7 +184,9 @@ def clean_gtfs(src: Path, dst: Path) -> None:
|
||||||
year = int(date_val[:4])
|
year = int(date_val[:4])
|
||||||
if year > 2100:
|
if year > 2100:
|
||||||
parts[i] = "20991231"
|
parts[i] = "20991231"
|
||||||
print(f" feed_info: capped end_date {date_val} → 20991231")
|
print(
|
||||||
|
f" feed_info: capped end_date {date_val} → 20991231"
|
||||||
|
)
|
||||||
fixed_lines.append(",".join(parts))
|
fixed_lines.append(",".join(parts))
|
||||||
zout.writestr("feed_info.txt", "\n".join(fixed_lines) + "\n")
|
zout.writestr("feed_info.txt", "\n".join(fixed_lines) + "\n")
|
||||||
else:
|
else:
|
||||||
|
|
@ -334,7 +341,9 @@ def convert_high_freq_to_frequency_based(
|
||||||
end_secs = trips[-1][1] + int(median_hw)
|
end_secs = trips[-1][1] + int(median_hw)
|
||||||
headway_rounded = max(60, round(median_hw / 60) * 60)
|
headway_rounded = max(60, round(median_hw / 60) * 60)
|
||||||
|
|
||||||
frequency_entries.append((template_trip_id, start_secs, end_secs, headway_rounded))
|
frequency_entries.append(
|
||||||
|
(template_trip_id, start_secs, end_secs, headway_rounded)
|
||||||
|
)
|
||||||
for trip_id, _ in trips[1:]:
|
for trip_id, _ in trips[1:]:
|
||||||
trips_to_remove.add(trip_id)
|
trips_to_remove.add(trip_id)
|
||||||
groups_converted += 1
|
groups_converted += 1
|
||||||
|
|
@ -344,9 +353,10 @@ def convert_high_freq_to_frequency_based(
|
||||||
print(f" Created {len(frequency_entries)} frequency entries")
|
print(f" Created {len(frequency_entries)} frequency entries")
|
||||||
|
|
||||||
# Step 5: Write modified GTFS
|
# Step 5: Write modified GTFS
|
||||||
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(
|
with (
|
||||||
dst, "w", zipfile.ZIP_DEFLATED
|
zipfile.ZipFile(src, "r") as zin,
|
||||||
) as zout:
|
zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
|
||||||
|
):
|
||||||
for info in zin.infolist():
|
for info in zin.infolist():
|
||||||
if info.filename == "trips.txt":
|
if info.filename == "trips.txt":
|
||||||
with zin.open(info) as f:
|
with zin.open(info) as f:
|
||||||
|
|
@ -466,15 +476,22 @@ def download_national_rail_cif(raw_dir: Path) -> Path | None:
|
||||||
email = os.environ.get("NATIONAL_RAIL_EMAIL")
|
email = os.environ.get("NATIONAL_RAIL_EMAIL")
|
||||||
password = os.environ.get("NATIONAL_RAIL_PASSWORD")
|
password = os.environ.get("NATIONAL_RAIL_PASSWORD")
|
||||||
if not email or not password:
|
if not email or not password:
|
||||||
print("Warning: NATIONAL_RAIL_EMAIL/NATIONAL_RAIL_PASSWORD not set, skipping national rail")
|
print(
|
||||||
|
"Warning: NATIONAL_RAIL_EMAIL/NATIONAL_RAIL_PASSWORD not set, skipping national rail"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
print("Authenticating with National Rail Open Data...")
|
print("Authenticating with National Rail Open Data...")
|
||||||
auth_data = urllib.parse.urlencode({"username": email, "password": password}).encode()
|
auth_data = urllib.parse.urlencode(
|
||||||
|
{"username": email, "password": password}
|
||||||
|
).encode()
|
||||||
auth_req = urllib.request.Request(
|
auth_req = urllib.request.Request(
|
||||||
NR_AUTH_URL,
|
NR_AUTH_URL,
|
||||||
data=auth_data,
|
data=auth_data,
|
||||||
headers={"User-Agent": USER_AGENT, "Content-Type": "application/x-www-form-urlencoded"},
|
headers={
|
||||||
|
"User-Agent": USER_AGENT,
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
with urllib.request.urlopen(auth_req) as resp:
|
with urllib.request.urlopen(auth_req) as resp:
|
||||||
token_data = json.loads(resp.read())
|
token_data = json.loads(resp.read())
|
||||||
|
|
@ -565,9 +582,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
||||||
coords_fixed = 0
|
coords_fixed = 0
|
||||||
route_types_fixed = 0
|
route_types_fixed = 0
|
||||||
|
|
||||||
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(
|
with (
|
||||||
dst, "w", zipfile.ZIP_DEFLATED
|
zipfile.ZipFile(src, "r") as zin,
|
||||||
) as zout:
|
zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout,
|
||||||
|
):
|
||||||
for info in zin.infolist():
|
for info in zin.infolist():
|
||||||
# Skip non-standard links.txt
|
# Skip non-standard links.txt
|
||||||
if info.filename == "links.txt":
|
if info.filename == "links.txt":
|
||||||
|
|
@ -581,8 +599,12 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
||||||
trip_id_idx = cols.index("trip_id")
|
trip_id_idx = cols.index("trip_id")
|
||||||
stop_id_idx = cols.index("stop_id")
|
stop_id_idx = cols.index("stop_id")
|
||||||
seq_idx = cols.index("stop_sequence")
|
seq_idx = cols.index("stop_sequence")
|
||||||
pickup_idx = cols.index("pickup_type") if "pickup_type" in cols else -1
|
pickup_idx = (
|
||||||
dropoff_idx = cols.index("drop_off_type") if "drop_off_type" in cols else -1
|
cols.index("pickup_type") if "pickup_type" in cols else -1
|
||||||
|
)
|
||||||
|
dropoff_idx = (
|
||||||
|
cols.index("drop_off_type") if "drop_off_type" in cols else -1
|
||||||
|
)
|
||||||
|
|
||||||
tmp = tempfile.NamedTemporaryFile(
|
tmp = tempfile.NamedTemporaryFile(
|
||||||
mode="wb", delete=False, suffix=".txt"
|
mode="wb", delete=False, suffix=".txt"
|
||||||
|
|
@ -769,16 +791,27 @@ def _docker_run_dtd2mysql(
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Run dtd2mysql in a Node.js container on the same Docker network as MariaDB."""
|
"""Run dtd2mysql in a Node.js container on the same Docker network as MariaDB."""
|
||||||
cmd = [
|
cmd = [
|
||||||
"docker", "run", "--rm", "--network", network,
|
"docker",
|
||||||
"-e", f"DATABASE_HOSTNAME={db_container}",
|
"run",
|
||||||
"-e", "DATABASE_USERNAME=root",
|
"--rm",
|
||||||
"-e", "DATABASE_PASSWORD=root",
|
"--network",
|
||||||
"-e", "DATABASE_NAME=dtd",
|
network,
|
||||||
|
"-e",
|
||||||
|
f"DATABASE_HOSTNAME={db_container}",
|
||||||
|
"-e",
|
||||||
|
"DATABASE_USERNAME=root",
|
||||||
|
"-e",
|
||||||
|
"DATABASE_PASSWORD=root",
|
||||||
|
"-e",
|
||||||
|
"DATABASE_NAME=dtd",
|
||||||
]
|
]
|
||||||
for v in volumes:
|
for v in volumes:
|
||||||
cmd.extend(["-v", v])
|
cmd.extend(["-v", v])
|
||||||
# Install zip (needed for --gtfs-zip) then run dtd2mysql
|
# Install zip (needed for --gtfs-zip) then run dtd2mysql
|
||||||
inner = "apt-get update -qq && apt-get install -y -qq zip > /dev/null 2>&1 && npx --yes dtd2mysql " + " ".join(args)
|
inner = (
|
||||||
|
"apt-get update -qq && apt-get install -y -qq zip > /dev/null 2>&1 && npx --yes dtd2mysql "
|
||||||
|
+ " ".join(args)
|
||||||
|
)
|
||||||
cmd.extend(["node:20", "bash", "-c", inner])
|
cmd.extend(["node:20", "bash", "-c", inner])
|
||||||
subprocess.run(cmd, check=True)
|
subprocess.run(cmd, check=True)
|
||||||
|
|
||||||
|
|
@ -805,11 +838,17 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
|
||||||
subprocess.run(["docker", "network", "create", network], capture_output=True)
|
subprocess.run(["docker", "network", "create", network], capture_output=True)
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
[
|
||||||
"docker", "run", "-d",
|
"docker",
|
||||||
"--name", db_container,
|
"run",
|
||||||
"--network", network,
|
"-d",
|
||||||
"-e", "MARIADB_ROOT_PASSWORD=root",
|
"--name",
|
||||||
"-e", "MARIADB_DATABASE=dtd",
|
db_container,
|
||||||
|
"--network",
|
||||||
|
network,
|
||||||
|
"-e",
|
||||||
|
"MARIADB_ROOT_PASSWORD=root",
|
||||||
|
"-e",
|
||||||
|
"MARIADB_DATABASE=dtd",
|
||||||
"mariadb:latest",
|
"mariadb:latest",
|
||||||
],
|
],
|
||||||
check=True,
|
check=True,
|
||||||
|
|
@ -820,7 +859,16 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
|
||||||
print(" Waiting for MariaDB to be ready...")
|
print(" Waiting for MariaDB to be ready...")
|
||||||
for attempt in range(30):
|
for attempt in range(30):
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["docker", "exec", db_container, "mariadb", "-uroot", "-proot", "-e", "SELECT 1"],
|
[
|
||||||
|
"docker",
|
||||||
|
"exec",
|
||||||
|
db_container,
|
||||||
|
"mariadb",
|
||||||
|
"-uroot",
|
||||||
|
"-proot",
|
||||||
|
"-e",
|
||||||
|
"SELECT 1",
|
||||||
|
],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
|
|
@ -833,14 +881,16 @@ def convert_national_rail_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
|
||||||
|
|
||||||
print("Importing CIF timetable into MariaDB...")
|
print("Importing CIF timetable into MariaDB...")
|
||||||
_docker_run_dtd2mysql(
|
_docker_run_dtd2mysql(
|
||||||
network, db_container,
|
network,
|
||||||
|
db_container,
|
||||||
volumes=[f"{raw_abs}:/data:ro"],
|
volumes=[f"{raw_abs}:/data:ro"],
|
||||||
args=["--timetable", "/data/national_rail_cif.zip"],
|
args=["--timetable", "/data/national_rail_cif.zip"],
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Exporting GTFS from MariaDB...")
|
print("Exporting GTFS from MariaDB...")
|
||||||
_docker_run_dtd2mysql(
|
_docker_run_dtd2mysql(
|
||||||
network, db_container,
|
network,
|
||||||
|
db_container,
|
||||||
volumes=[f"{raw_abs}:/output"],
|
volumes=[f"{raw_abs}:/output"],
|
||||||
args=["--gtfs-zip", "/output/national_rail_gtfs_raw.zip"],
|
args=["--gtfs-zip", "/output/national_rail_gtfs_raw.zip"],
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -94,11 +94,18 @@ def _build(
|
||||||
|
|
||||||
# Remap terminated postcodes to nearest active successor
|
# Remap terminated postcodes to nearest active successor
|
||||||
postcode_mapping = build_postcode_mapping(arcgis_path)
|
postcode_mapping = build_postcode_mapping(arcgis_path)
|
||||||
wide = wide.join(
|
wide = (
|
||||||
postcode_mapping.lazy(), left_on="postcode", right_on="old_postcode", how="left"
|
wide.join(
|
||||||
).with_columns(
|
postcode_mapping.lazy(),
|
||||||
pl.coalesce("new_postcode", "postcode").alias("postcode"),
|
left_on="postcode",
|
||||||
).drop("new_postcode")
|
right_on="old_postcode",
|
||||||
|
how="left",
|
||||||
|
)
|
||||||
|
.with_columns(
|
||||||
|
pl.coalesce("new_postcode", "postcode").alias("postcode"),
|
||||||
|
)
|
||||||
|
.drop("new_postcode")
|
||||||
|
)
|
||||||
|
|
||||||
arcgis = (
|
arcgis = (
|
||||||
pl.scan_parquet(arcgis_path)
|
pl.scan_parquet(arcgis_path)
|
||||||
|
|
@ -252,16 +259,18 @@ def _build(
|
||||||
.otherwise(pl.col("pp_property_type"))
|
.otherwise(pl.col("pp_property_type"))
|
||||||
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes",
|
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes",
|
||||||
# collapse terrace sub-types, and fold rare types into "Other"
|
# collapse terrace sub-types, and fold rare types into "Other"
|
||||||
.replace({
|
.replace(
|
||||||
"Flat": "Flats/Maisonettes",
|
{
|
||||||
"Maisonette": "Flats/Maisonettes",
|
"Flat": "Flats/Maisonettes",
|
||||||
"End-Terrace": "Terraced",
|
"Maisonette": "Flats/Maisonettes",
|
||||||
"Mid-Terrace": "Terraced",
|
"End-Terrace": "Terraced",
|
||||||
"Enclosed End-Terrace": "Terraced",
|
"Mid-Terrace": "Terraced",
|
||||||
"Enclosed Mid-Terrace": "Terraced",
|
"Enclosed End-Terrace": "Terraced",
|
||||||
"Bungalow": "Other",
|
"Enclosed Mid-Terrace": "Terraced",
|
||||||
"Park home": "Other",
|
"Bungalow": "Other",
|
||||||
})
|
"Park home": "Other",
|
||||||
|
}
|
||||||
|
)
|
||||||
.alias("property_type")
|
.alias("property_type")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -426,10 +435,16 @@ def main():
|
||||||
help="Census 2021 population by LSOA parquet file",
|
help="Census 2021 population by LSOA parquet file",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
|
"--output-postcodes",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Output postcode parquet file path",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-properties", type=Path, required=True, help="Output properties parquet file path"
|
"--output-properties",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Output properties parquet file path",
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -454,9 +454,7 @@ class TestFillHoles:
|
||||||
hole1 = [(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)]
|
hole1 = [(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)]
|
||||||
outer2 = [(60, 60), (110, 60), (110, 110), (60, 110), (60, 60)]
|
outer2 = [(60, 60), (110, 60), (110, 110), (60, 110), (60, 60)]
|
||||||
hole2 = [(70, 70), (80, 70), (80, 80), (70, 80), (70, 70)]
|
hole2 = [(70, 70), (80, 70), (80, 80), (70, 80), (70, 70)]
|
||||||
mp = MultiPolygon(
|
mp = MultiPolygon([Polygon(outer1, [hole1]), Polygon(outer2, [hole2])])
|
||||||
[Polygon(outer1, [hole1]), Polygon(outer2, [hole2])]
|
|
||||||
)
|
|
||||||
result = _fill_holes(mp)
|
result = _fill_holes(mp)
|
||||||
assert result.geom_type == "MultiPolygon"
|
assert result.geom_type == "MultiPolygon"
|
||||||
for p in result.geoms:
|
for p in result.geoms:
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,9 @@ def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
|
||||||
|
|
||||||
|
|
||||||
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
|
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
|
||||||
valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
|
valid = (
|
||||||
|
np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
|
||||||
|
)
|
||||||
actual = actual[valid]
|
actual = actual[valid]
|
||||||
predicted = predicted[valid]
|
predicted = predicted[valid]
|
||||||
|
|
||||||
|
|
@ -176,7 +178,10 @@ def main():
|
||||||
"--input", type=Path, required=True, help="Path to properties.parquet"
|
"--input", type=Path, required=True, help="Path to properties.parquet"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--postcodes", type=Path, required=True, help="Path to postcode.parquet (for lat/lon)"
|
"--postcodes",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Path to postcode.parquet (for lat/lon)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output", type=Path, required=True, help="Output backtest_results.parquet"
|
"--output", type=Path, required=True, help="Output backtest_results.parquet"
|
||||||
|
|
@ -185,7 +190,9 @@ def main():
|
||||||
|
|
||||||
# Build index from pre-test data only (temporal holdout)
|
# Build index from pre-test data only (temporal holdout)
|
||||||
print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...")
|
print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...")
|
||||||
index = build_index(args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes)
|
index = build_index(
|
||||||
|
args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes
|
||||||
|
)
|
||||||
print(
|
print(
|
||||||
f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
|
f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
|
||||||
f"{index['type_group'].n_unique()} type groups"
|
f"{index['type_group'].n_unique()} type groups"
|
||||||
|
|
@ -233,7 +240,9 @@ def main():
|
||||||
knn_est = knn_psm * fa * temporal_adj
|
knn_est = knn_psm * fa * temporal_adj
|
||||||
|
|
||||||
n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum())
|
n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum())
|
||||||
print(f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)")
|
print(
|
||||||
|
f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)"
|
||||||
|
)
|
||||||
|
|
||||||
# Blend: (1-w)*index + w*kNN where both available
|
# Blend: (1-w)*index + w*kNN where both available
|
||||||
index_est = test["predicted"].to_numpy().astype(np.float64)
|
index_est = test["predicted"].to_numpy().astype(np.float64)
|
||||||
|
|
|
||||||
|
|
@ -107,9 +107,7 @@ def main():
|
||||||
pl.when(has_price)
|
pl.when(has_price)
|
||||||
.then(
|
.then(
|
||||||
pl.col("Last known price").cast(pl.Float64)
|
pl.col("Last known price").cast(pl.Float64)
|
||||||
* (
|
* (pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp"))
|
||||||
pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp")
|
|
||||||
)
|
|
||||||
.clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT)
|
.clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT)
|
||||||
.exp()
|
.exp()
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -105,9 +105,7 @@ def extract_pairs(input_path: Path, max_year2: int | None = None) -> pl.DataFram
|
||||||
.alias("log_ratio"),
|
.alias("log_ratio"),
|
||||||
(
|
(
|
||||||
1.0
|
1.0
|
||||||
/ (pl.col("frac_year2") - pl.col("frac_year1"))
|
/ (pl.col("frac_year2") - pl.col("frac_year1")).cast(pl.Float64).sqrt()
|
||||||
.cast(pl.Float64)
|
|
||||||
.sqrt()
|
|
||||||
).alias("weight"),
|
).alias("weight"),
|
||||||
)
|
)
|
||||||
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
|
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
|
||||||
|
|
@ -453,8 +451,12 @@ def main():
|
||||||
description="Build improved repeat-sales price index"
|
description="Build improved repeat-sales price index"
|
||||||
)
|
)
|
||||||
parser.add_argument("--input", type=Path, required=True)
|
parser.add_argument("--input", type=Path, required=True)
|
||||||
parser.add_argument("--postcodes", type=Path, required=True,
|
parser.add_argument(
|
||||||
help="Path to postcode.parquet (for lat/lon centroids)")
|
"--postcodes",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Path to postcode.parquet (for lat/lon centroids)",
|
||||||
|
)
|
||||||
parser.add_argument("--output", type=Path, required=True)
|
parser.add_argument("--output", type=Path, required=True)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,48 +43,39 @@ def build_knn_pool(
|
||||||
"""
|
"""
|
||||||
print("Building kNN pool...")
|
print("Building kNN pool...")
|
||||||
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
|
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
|
||||||
query = (
|
query = lf.select(
|
||||||
lf
|
"Postcode",
|
||||||
.select(
|
"Property type",
|
||||||
"Postcode",
|
"lat",
|
||||||
"Property type",
|
"lon",
|
||||||
"lat",
|
"Total floor area (sqm)",
|
||||||
"lon",
|
"Last known price",
|
||||||
"Total floor area (sqm)",
|
"Date of last transaction",
|
||||||
"Last known price",
|
).filter(
|
||||||
"Date of last transaction",
|
pl.col("lat").is_not_null(),
|
||||||
)
|
pl.col("lon").is_not_null(),
|
||||||
.filter(
|
pl.col("Total floor area (sqm)").is_not_null(),
|
||||||
pl.col("lat").is_not_null(),
|
pl.col("Total floor area (sqm)") > 0,
|
||||||
pl.col("lon").is_not_null(),
|
pl.col("Last known price").is_not_null(),
|
||||||
pl.col("Total floor area (sqm)").is_not_null(),
|
pl.col("Last known price") > 0,
|
||||||
pl.col("Total floor area (sqm)") > 0,
|
pl.col("Postcode").is_not_null(),
|
||||||
pl.col("Last known price").is_not_null(),
|
pl.col("Date of last transaction").is_not_null(),
|
||||||
pl.col("Last known price") > 0,
|
|
||||||
pl.col("Postcode").is_not_null(),
|
|
||||||
pl.col("Date of last transaction").is_not_null(),
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
if max_sale_year is not None:
|
if max_sale_year is not None:
|
||||||
query = query.filter(
|
query = query.filter(
|
||||||
pl.col("Date of last transaction").dt.year() < max_sale_year
|
pl.col("Date of last transaction").dt.year() < max_sale_year
|
||||||
)
|
)
|
||||||
|
|
||||||
pool = (
|
pool = query.with_columns(
|
||||||
query.with_columns(
|
sector_expr(),
|
||||||
sector_expr(),
|
type_group_expr(),
|
||||||
type_group_expr(),
|
(
|
||||||
(
|
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
|
||||||
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
|
+ (pl.col("Date of last transaction").dt.month().cast(pl.Float64) - 1.0)
|
||||||
+ (
|
/ 12.0
|
||||||
pl.col("Date of last transaction").dt.month().cast(pl.Float64)
|
).alias("_sale_fy"),
|
||||||
- 1.0
|
pl.lit(ref_frac_year).alias("_ref_fy"),
|
||||||
)
|
).collect()
|
||||||
/ 12.0
|
|
||||||
).alias("_sale_fy"),
|
|
||||||
pl.lit(ref_frac_year).alias("_ref_fy"),
|
|
||||||
).collect()
|
|
||||||
)
|
|
||||||
pool = pool.filter(pl.col("type_group").is_not_null())
|
pool = pool.filter(pl.col("type_group").is_not_null())
|
||||||
print(f" {len(pool):,} pool properties with lat/lon, floor area, price")
|
print(f" {len(pool):,} pool properties with lat/lon, floor area, price")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1085,7 +1085,9 @@ def transform(
|
||||||
if cat not in all_set:
|
if cat not in all_set:
|
||||||
mapped_but_absent.append(cat)
|
mapped_but_absent.append(cat)
|
||||||
if mapped_but_absent:
|
if mapped_but_absent:
|
||||||
print(f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}")
|
print(
|
||||||
|
f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}"
|
||||||
|
)
|
||||||
|
|
||||||
# Drop unwanted categories
|
# Drop unwanted categories
|
||||||
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
|
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,9 @@ def load_england_polygon(geojson_path: Path) -> PreparedGeometry:
|
||||||
return prep(geometry)
|
return prep(geometry)
|
||||||
|
|
||||||
|
|
||||||
def in_england_mask(geojson_path: Path, lats: np.ndarray, lngs: np.ndarray) -> np.ndarray:
|
def in_england_mask(
|
||||||
|
geojson_path: Path, lats: np.ndarray, lngs: np.ndarray
|
||||||
|
) -> np.ndarray:
|
||||||
"""Vectorized check: which (lat, lng) points are within England.
|
"""Vectorized check: which (lat, lng) points are within England.
|
||||||
|
|
||||||
Returns a boolean numpy array.
|
Returns a boolean numpy array.
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,9 @@ def count_pois_per_postcode(
|
||||||
if nearby is None:
|
if nearby is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
|
distances = haversine_km(
|
||||||
|
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
|
||||||
|
)
|
||||||
|
|
||||||
within_mask = distances <= radius_km
|
within_mask = distances <= radius_km
|
||||||
within_indices = nearby[within_mask]
|
within_indices = nearby[within_mask]
|
||||||
|
|
@ -179,7 +181,9 @@ def min_distance_per_postcode(
|
||||||
if nearby is None:
|
if nearby is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
|
distances = haversine_km(
|
||||||
|
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
|
||||||
|
)
|
||||||
|
|
||||||
for group, cat_mask in category_masks.items():
|
for group, cat_mask in category_masks.items():
|
||||||
group_mask = cat_mask[nearby]
|
group_mask = cat_mask[nearby]
|
||||||
|
|
|
||||||
|
|
@ -15,26 +15,49 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
||||||
"""
|
"""
|
||||||
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
|
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
|
||||||
|
|
||||||
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
active = (
|
||||||
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
arcgis.filter(pl.col("doterm").is_null())
|
||||||
|
.select("pcds", "oseast1m", "osnrth1m")
|
||||||
|
.collect()
|
||||||
|
)
|
||||||
|
terminated = (
|
||||||
|
arcgis.filter(pl.col("doterm").is_not_null())
|
||||||
|
.select("pcds", "oseast1m", "osnrth1m")
|
||||||
|
.collect()
|
||||||
|
)
|
||||||
|
|
||||||
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}")
|
print(
|
||||||
|
f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}"
|
||||||
|
)
|
||||||
|
|
||||||
if terminated.height == 0:
|
if terminated.height == 0:
|
||||||
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)})
|
return pl.DataFrame(
|
||||||
|
{
|
||||||
|
"old_postcode": pl.Series([], dtype=pl.Utf8),
|
||||||
|
"new_postcode": pl.Series([], dtype=pl.Utf8),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()])
|
active_coords = np.column_stack(
|
||||||
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()])
|
[active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
|
||||||
|
)
|
||||||
|
terminated_coords = np.column_stack(
|
||||||
|
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
|
||||||
|
)
|
||||||
|
|
||||||
tree = cKDTree(active_coords)
|
tree = cKDTree(active_coords)
|
||||||
distances, indices = tree.query(terminated_coords)
|
distances, indices = tree.query(terminated_coords)
|
||||||
|
|
||||||
active_postcodes = active["pcds"]
|
active_postcodes = active["pcds"]
|
||||||
mapping = pl.DataFrame({
|
mapping = pl.DataFrame(
|
||||||
"old_postcode": terminated["pcds"],
|
{
|
||||||
"new_postcode": active_postcodes.gather(indices),
|
"old_postcode": terminated["pcds"],
|
||||||
})
|
"new_postcode": active_postcodes.gather(indices),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m")
|
print(
|
||||||
|
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
|
||||||
|
)
|
||||||
|
|
||||||
return mapping
|
return mapping
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,9 @@ def test_no_pois_returns_zeros(postcodes):
|
||||||
"category": pl.Series([], dtype=pl.String),
|
"category": pl.Series([], dtype=pl.String),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
result = count_pois_per_postcode(postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0)
|
result = count_pois_per_postcode(
|
||||||
|
postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0
|
||||||
|
)
|
||||||
|
|
||||||
for group in POI_GROUPS:
|
for group in POI_GROUPS:
|
||||||
col = f"{group}_2km"
|
col = f"{group}_2km"
|
||||||
|
|
@ -125,7 +127,9 @@ def test_min_distance_no_pois_returns_nan(postcodes):
|
||||||
"category": pl.Series([], dtype=pl.String),
|
"category": pl.Series([], dtype=pl.String),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]})
|
result = min_distance_per_postcode(
|
||||||
|
postcodes, empty_pois, groups={"train_tube": ["Rail station"]}
|
||||||
|
)
|
||||||
|
|
||||||
assert "train_tube_nearest_km" in result.columns
|
assert "train_tube_nearest_km" in result.columns
|
||||||
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())
|
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,8 @@
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pd.set_option('display.max_columns', None)\n",
|
"pd.set_option(\"display.max_columns\", None)\n",
|
||||||
"pd.set_option('display.max_colwidth', 60)"
|
"pd.set_option(\"display.max_colwidth\", 60)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -47,7 +47,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"param_import_path = '/bulk/wide-2.parquet'\n",
|
"param_import_path = \"/bulk/wide-2.parquet\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"param_lookback = 3"
|
"param_lookback = 3"
|
||||||
]
|
]
|
||||||
|
|
@ -128,7 +128,7 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"data = pl.scan_parquet(param_import_path).unique(subset=[\"Postcode\", \"Address per EPC\"])\n",
|
"data = pl.scan_parquet(param_import_path).unique(subset=[\"Postcode\", \"Address per EPC\"])\n",
|
||||||
"data = data.filter(pl.col('Total floor area (sqm)') > 10)\n",
|
"data = data.filter(pl.col(\"Total floor area (sqm)\") > 10)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# print(data.collect_schema()) # column names and types\n",
|
"# print(data.collect_schema()) # column names and types\n",
|
||||||
"print(data.select(pl.len()).collect()) # row count\n",
|
"print(data.select(pl.len()).collect()) # row count\n",
|
||||||
|
|
@ -145,22 +145,20 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"columns_required = [\n",
|
"columns_required = [\n",
|
||||||
" # absolute neccesity \n",
|
" # absolute neccesity\n",
|
||||||
" 'Postcode',\n",
|
" \"Postcode\",\n",
|
||||||
" 'Address per EPC',\n",
|
" \"Address per EPC\",\n",
|
||||||
" 'historical_prices',\n",
|
" \"historical_prices\",\n",
|
||||||
" 'Price per sqm',\n",
|
" \"Price per sqm\",\n",
|
||||||
"\n",
|
|
||||||
" # faily fixed attributes\n",
|
" # faily fixed attributes\n",
|
||||||
" 'Property type', # or 'epc_property_type' or 'built_form'\n",
|
" \"Property type\", # or 'epc_property_type' or 'built_form'\n",
|
||||||
" 'Leashold/Freehold',\n",
|
" \"Leashold/Freehold\",\n",
|
||||||
" 'Total floor area (sqm)',\n",
|
" \"Total floor area (sqm)\",\n",
|
||||||
" 'Rooms (including bedrooms & bathrooms)',\n",
|
" \"Rooms (including bedrooms & bathrooms)\",\n",
|
||||||
" 'Approximate construction age',\n",
|
" \"Approximate construction age\",\n",
|
||||||
"\n",
|
|
||||||
" # latest\n",
|
" # latest\n",
|
||||||
" # 'date_of_transfer'\n",
|
" # 'date_of_transfer'\n",
|
||||||
" 'Last known price'\n",
|
" \"Last known price\",\n",
|
||||||
"]"
|
"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -440,8 +438,13 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# temp_Postcodes = [\"LE5 4ED\", \"E14 9GU\", \"YO8 9PW\", \"SW1P 3AN\", \"BH3 7DX\", \"E14 2DG\"]\n",
|
"# temp_Postcodes = [\"LE5 4ED\", \"E14 9GU\", \"YO8 9PW\", \"SW1P 3AN\", \"BH3 7DX\", \"E14 2DG\"]\n",
|
||||||
"temp_Postcodes = data.select('Postcode').collect().sample(10000)['Postcode'].to_list()\n",
|
"temp_Postcodes = data.select(\"Postcode\").collect().sample(10000)[\"Postcode\"].to_list()\n",
|
||||||
"data_small = data.filter(pl.col(\"Postcode\").is_in(temp_Postcodes)).select(columns_required).collect().to_pandas()\n",
|
"data_small = (\n",
|
||||||
|
" data.filter(pl.col(\"Postcode\").is_in(temp_Postcodes))\n",
|
||||||
|
" .select(columns_required)\n",
|
||||||
|
" .collect()\n",
|
||||||
|
" .to_pandas()\n",
|
||||||
|
")\n",
|
||||||
"data_small = data_small.explode(\"historical_prices\")\n",
|
"data_small = data_small.explode(\"historical_prices\")\n",
|
||||||
"data_small[\"year\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"year\"])\n",
|
"data_small[\"year\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"year\"])\n",
|
||||||
"data_small[\"price\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"price\"])\n",
|
"data_small[\"price\"] = data_small[\"historical_prices\"].apply(lambda x: x[\"price\"])\n",
|
||||||
|
|
@ -458,7 +461,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# data_small[\n",
|
"# data_small[\n",
|
||||||
"# (data_small['Postcode'] == 'E14 2DG') \n",
|
"# (data_small['Postcode'] == 'E14 2DG')\n",
|
||||||
"# & data_small['epc_address'].str.contains('76')\n",
|
"# & data_small['epc_address'].str.contains('76')\n",
|
||||||
"# ]"
|
"# ]"
|
||||||
]
|
]
|
||||||
|
|
@ -908,35 +911,45 @@
|
||||||
"from typing import Any\n",
|
"from typing import Any\n",
|
||||||
"from pandas.core.frame import DataFrame\n",
|
"from pandas.core.frame import DataFrame\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(f'rolling periods (relative): {[i for i in range(-param_lookback, 1)]}')\n",
|
"print(f\"rolling periods (relative): {[i for i in range(-param_lookback, 1)]}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Rolling average (±2 year), weighted by number of sales per year\n",
|
"# Rolling average (±2 year), weighted by number of sales per year\n",
|
||||||
"pc_avg_raw = data_small.groupby(['Postcode', 'year']).agg(\n",
|
"pc_avg_raw = (\n",
|
||||||
" ppsqm_sum=('Price per sqm', 'sum'),\n",
|
" data_small.groupby([\"Postcode\", \"year\"])\n",
|
||||||
" ppsqm_count=('Price per sqm', 'count')\n",
|
" .agg(ppsqm_sum=(\"Price per sqm\", \"sum\"), ppsqm_count=(\"Price per sqm\", \"count\"))\n",
|
||||||
").reset_index().sort_values(by=['Postcode', 'year'], ascending=False)\n",
|
" .reset_index()\n",
|
||||||
|
" .sort_values(by=[\"Postcode\", \"year\"], ascending=False)\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"display(pc_avg_raw)\n",
|
"display(pc_avg_raw)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Each year's totals contribute to year-1, year, and year+1\n",
|
"# Each year's totals contribute to year-1, year, and year+1\n",
|
||||||
"pc_avg_expanded = pd.concat([\n",
|
"pc_avg_expanded = pd.concat(\n",
|
||||||
" pc_avg_raw.assign(year=pc_avg_raw['year'] + offset) for offset in range(-param_lookback, 1) # \n",
|
" [\n",
|
||||||
"])\n",
|
" pc_avg_raw.assign(year=pc_avg_raw[\"year\"] + offset)\n",
|
||||||
|
" for offset in range(-param_lookback, 1) #\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"display(pc_avg_expanded)\n",
|
"display(pc_avg_expanded)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Sum counts and sums, then divide to get weighted mean\n",
|
"# Sum counts and sums, then divide to get weighted mean\n",
|
||||||
"pc_avg_complex = pc_avg_expanded.groupby(['Postcode', 'year']).agg(\n",
|
"pc_avg_complex = (\n",
|
||||||
" ppsqm_sum=('ppsqm_sum', 'sum'),\n",
|
" pc_avg_expanded.groupby([\"Postcode\", \"year\"])\n",
|
||||||
" ppsqm_count=('ppsqm_count', 'sum')\n",
|
" .agg(ppsqm_sum=(\"ppsqm_sum\", \"sum\"), ppsqm_count=(\"ppsqm_count\", \"sum\"))\n",
|
||||||
").reset_index()\n",
|
" .reset_index()\n",
|
||||||
"pc_avg_complex['Price per sqm PC AVG'] = pc_avg_complex['ppsqm_sum'] / pc_avg_complex['ppsqm_count']\n",
|
")\n",
|
||||||
"pc_avg_complex: Any | DataFrame = pc_avg_complex[['Postcode', 'year', 'Price per sqm PC AVG']].sort_values(by=['Postcode', 'year'], ascending=False)\n",
|
"pc_avg_complex[\"Price per sqm PC AVG\"] = (\n",
|
||||||
|
" pc_avg_complex[\"ppsqm_sum\"] / pc_avg_complex[\"ppsqm_count\"]\n",
|
||||||
|
")\n",
|
||||||
|
"pc_avg_complex: Any | DataFrame = pc_avg_complex[\n",
|
||||||
|
" [\"Postcode\", \"year\", \"Price per sqm PC AVG\"]\n",
|
||||||
|
"].sort_values(by=[\"Postcode\", \"year\"], ascending=False)\n",
|
||||||
"display(pc_avg_complex)\n",
|
"display(pc_avg_complex)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"temp_df = pc_avg_complex[pc_avg_complex['Postcode'] == data_small['Postcode'].iloc[0]]\n",
|
"temp_df = pc_avg_complex[pc_avg_complex[\"Postcode\"] == data_small[\"Postcode\"].iloc[0]]\n",
|
||||||
"print(data_small['Postcode'].iloc[0])\n",
|
"print(data_small[\"Postcode\"].iloc[0])\n",
|
||||||
"temp_df.plot.line(x='year', y='Price per sqm PC AVG')"
|
"temp_df.plot.line(x=\"year\", y=\"Price per sqm PC AVG\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -1111,9 +1124,13 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"data_small = data_small.merge(pc_avg_complex, on=['Postcode', 'year'], suffixes=('', ' pc_avg_complex'))\n",
|
"data_small = data_small.merge(\n",
|
||||||
"data_small['c'] = data_small['Price per sqm'] / data_small['Price per sqm PC AVG']\n",
|
" pc_avg_complex, on=[\"Postcode\", \"year\"], suffixes=(\"\", \" pc_avg_complex\")\n",
|
||||||
"data_small[['Postcode', 'Address per EPC', 'Price per sqm', 'Price per sqm PC AVG', 'c']]"
|
")\n",
|
||||||
|
"data_small[\"c\"] = data_small[\"Price per sqm\"] / data_small[\"Price per sqm PC AVG\"]\n",
|
||||||
|
"data_small[\n",
|
||||||
|
" [\"Postcode\", \"Address per EPC\", \"Price per sqm\", \"Price per sqm PC AVG\", \"c\"]\n",
|
||||||
|
"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -1445,17 +1462,21 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
|
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
|
||||||
"c_stats = data_small.groupby(['Postcode', 'Address per EPC']).agg(\n",
|
"c_stats = (\n",
|
||||||
" n_sales=('c', 'count'),\n",
|
" data_small.groupby([\"Postcode\", \"Address per EPC\"])\n",
|
||||||
" year_min=('year', 'min'),\n",
|
" .agg(\n",
|
||||||
" year_max=('year', 'max'),\n",
|
" n_sales=(\"c\", \"count\"),\n",
|
||||||
" c_mean=('c', 'mean'),\n",
|
" year_min=(\"year\", \"min\"),\n",
|
||||||
" c_std=('c', 'std'),\n",
|
" year_max=(\"year\", \"max\"),\n",
|
||||||
").dropna()\n",
|
" c_mean=(\"c\", \"mean\"),\n",
|
||||||
"c_stats['c_cv'] = c_stats['c_std'] / c_stats['c_mean']\n",
|
" c_std=(\"c\", \"std\"),\n",
|
||||||
|
" )\n",
|
||||||
|
" .dropna()\n",
|
||||||
|
")\n",
|
||||||
|
"c_stats[\"c_cv\"] = c_stats[\"c_std\"] / c_stats[\"c_mean\"]\n",
|
||||||
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
|
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
|
||||||
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
|
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
|
||||||
"c_stats.sort_values('c_cv', ascending=False).head(20)"
|
"c_stats.sort_values(\"c_cv\", ascending=False).head(20)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -2265,42 +2286,44 @@
|
||||||
"display(random_c)\n",
|
"display(random_c)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# pc avg trend\n",
|
"# pc avg trend\n",
|
||||||
"temp_pc_avg = pc_avg_complex[pc_avg_complex['Postcode'] == random_c.index[0][0]].sort_values(by='year')\n",
|
"temp_pc_avg = pc_avg_complex[\n",
|
||||||
|
" pc_avg_complex[\"Postcode\"] == random_c.index[0][0]\n",
|
||||||
|
"].sort_values(by=\"year\")\n",
|
||||||
"display(temp_pc_avg)\n",
|
"display(temp_pc_avg)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# c for specific address\n",
|
"# c for specific address\n",
|
||||||
"temp_postcode = data_small[\n",
|
"temp_postcode = data_small[\n",
|
||||||
" (data_small['Postcode'] == random_c.index[0][0]) \n",
|
" (data_small[\"Postcode\"] == random_c.index[0][0])\n",
|
||||||
" # & (data_small['Address per EPC'] == random_c.index[0][1]) \n",
|
" # & (data_small['Address per EPC'] == random_c.index[0][1])\n",
|
||||||
"].sort_values(by='year')\n",
|
"].sort_values(by=\"year\")\n",
|
||||||
"display(temp_postcode)\n",
|
"display(temp_postcode)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"temp_address = data_small[\n",
|
"temp_address = data_small[\n",
|
||||||
" (data_small['Postcode'] == random_c.index[0][0]) \n",
|
" (data_small[\"Postcode\"] == random_c.index[0][0])\n",
|
||||||
" & (data_small['Address per EPC'] == random_c.index[0][1]) \n",
|
" & (data_small[\"Address per EPC\"] == random_c.index[0][1])\n",
|
||||||
"].sort_values(by='year')\n",
|
"].sort_values(by=\"year\")\n",
|
||||||
"display(temp_address)\n",
|
"display(temp_address)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# plot\n",
|
"# plot\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig, ax1 = plt.subplots()\n",
|
"fig, ax1 = plt.subplots()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"temp_pc_avg.plot.line(x='year', y='Price per sqm PC AVG', ax=ax1, color='black')\n",
|
"temp_pc_avg.plot.line(x=\"year\", y=\"Price per sqm PC AVG\", ax=ax1, color=\"black\")\n",
|
||||||
"temp_address.plot.line(x='year', y='Price per sqm', ax=ax1, color='green') \n",
|
"temp_address.plot.line(x=\"year\", y=\"Price per sqm\", ax=ax1, color=\"green\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ax2 = ax1.twinx()\n",
|
"ax2 = ax1.twinx()\n",
|
||||||
"ax2.set_ylim(0, 3)\n",
|
"ax2.set_ylim(0, 3)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for property in temp_postcode['Address per EPC'].unique():\n",
|
"for property in temp_postcode[\"Address per EPC\"].unique():\n",
|
||||||
" property_data = temp_postcode[temp_postcode['Address per EPC'] == property]\n",
|
" property_data = temp_postcode[temp_postcode[\"Address per EPC\"] == property]\n",
|
||||||
" property_data.plot.line(x='year', y='c', ax=ax2, color='orange', style=':')\n",
|
" property_data.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"orange\", style=\":\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"temp_address.plot.line(x='year', y='c', ax=ax2, color='red', style=':')\n",
|
"temp_address.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"red\", style=\":\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ax1.set_ylabel('Price per sqm')\n",
|
"ax1.set_ylabel(\"Price per sqm\")\n",
|
||||||
"ax2.set_ylabel('c')\n",
|
"ax2.set_ylabel(\"c\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.show()\n"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -2640,17 +2663,21 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
|
"# 1. Coefficient of Variation (std/mean) per property, filtered to 3+ sales\n",
|
||||||
"c_stats = data_small.groupby(['Postcode', 'Address per EPC']).agg(\n",
|
"c_stats = (\n",
|
||||||
" n_sales=('c', 'count'),\n",
|
" data_small.groupby([\"Postcode\", \"Address per EPC\"])\n",
|
||||||
" year_min=('year', 'min'),\n",
|
" .agg(\n",
|
||||||
" year_max=('year', 'max'),\n",
|
" n_sales=(\"c\", \"count\"),\n",
|
||||||
" c_mean=('c', 'mean'),\n",
|
" year_min=(\"year\", \"min\"),\n",
|
||||||
" c_std=('c', 'std'),\n",
|
" year_max=(\"year\", \"max\"),\n",
|
||||||
").dropna()\n",
|
" c_mean=(\"c\", \"mean\"),\n",
|
||||||
"c_stats['c_cv'] = c_stats['c_std'] / c_stats['c_mean']\n",
|
" c_std=(\"c\", \"std\"),\n",
|
||||||
|
" )\n",
|
||||||
|
" .dropna()\n",
|
||||||
|
")\n",
|
||||||
|
"c_stats[\"c_cv\"] = c_stats[\"c_std\"] / c_stats[\"c_mean\"]\n",
|
||||||
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
|
"# c_stats_3plus = c_stats[c_stats['n_sales'] >= 3]\n",
|
||||||
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
|
"# print(f\"Properties with 3+ sales: {len(c_stats_3plus)} / {len(c_stats)}\")\n",
|
||||||
"c_stats.sort_values('c_cv', ascending=False).head(20)"
|
"c_stats.sort_values(\"c_cv\", ascending=False).head(20)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -2685,31 +2712,41 @@
|
||||||
"\n",
|
"\n",
|
||||||
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
|
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"axes[0].hist(c_stats['c_std'], bins=100, edgecolor='black')\n",
|
"axes[0].hist(c_stats[\"c_std\"], bins=100, edgecolor=\"black\")\n",
|
||||||
"axes[0].set_xlabel('Std of c')\n",
|
"axes[0].set_xlabel(\"Std of c\")\n",
|
||||||
"axes[0].set_ylabel('Number of properties')\n",
|
"axes[0].set_ylabel(\"Number of properties\")\n",
|
||||||
"axes[0].set_title('Distribution of c stability (std)')\n",
|
"axes[0].set_title(\"Distribution of c stability (std)\")\n",
|
||||||
"axes[0].axvline(x=c_stats['c_std'].median(), color='red', linestyle='--', label=f'Median ({c_stats['c_std'].median()}) threshold')\n",
|
"axes[0].axvline(\n",
|
||||||
|
" x=c_stats[\"c_std\"].median(),\n",
|
||||||
|
" color=\"red\",\n",
|
||||||
|
" linestyle=\"--\",\n",
|
||||||
|
" label=f\"Median ({c_stats['c_std'].median()}) threshold\",\n",
|
||||||
|
")\n",
|
||||||
"axes[0].legend()\n",
|
"axes[0].legend()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"axes[1].hist(c_stats['c_cv'], bins=100, edgecolor='black')\n",
|
"axes[1].hist(c_stats[\"c_cv\"], bins=100, edgecolor=\"black\")\n",
|
||||||
"axes[1].set_xlabel('CV of c (std/mean)')\n",
|
"axes[1].set_xlabel(\"CV of c (std/mean)\")\n",
|
||||||
"axes[1].set_ylabel('Number of properties')\n",
|
"axes[1].set_ylabel(\"Number of properties\")\n",
|
||||||
"axes[1].set_title('Distribution of c stability (CV)')\n",
|
"axes[1].set_title(\"Distribution of c stability (CV)\")\n",
|
||||||
"axes[1].axvline(x=c_stats['c_cv'].median(), color='red', linestyle='--', label=f'Median ({c_stats['c_cv'].median()}) threshold')\n",
|
"axes[1].axvline(\n",
|
||||||
|
" x=c_stats[\"c_cv\"].median(),\n",
|
||||||
|
" color=\"red\",\n",
|
||||||
|
" linestyle=\"--\",\n",
|
||||||
|
" label=f\"Median ({c_stats['c_cv'].median()}) threshold\",\n",
|
||||||
|
")\n",
|
||||||
"axes[1].legend()\n",
|
"axes[1].legend()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.tight_layout()\n",
|
"plt.tight_layout()\n",
|
||||||
"plt.show()\n",
|
"plt.show()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# output text\n",
|
"# output text\n",
|
||||||
"pct_stable = (c_stats['c_cv'] < 0.1).mean() * 100\n",
|
"pct_stable = (c_stats[\"c_cv\"] < 0.1).mean() * 100\n",
|
||||||
"print(f\"{pct_stable:.1f}% of properties have CV < 0.1\")\n",
|
"print(f\"{pct_stable:.1f}% of properties have CV < 0.1\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pct_stable = (c_stats['c_cv'] < 0.2).mean() * 100\n",
|
"pct_stable = (c_stats[\"c_cv\"] < 0.2).mean() * 100\n",
|
||||||
"print(f\"{pct_stable:.1f}% of properties have CV < 0.2\")\n",
|
"print(f\"{pct_stable:.1f}% of properties have CV < 0.2\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pct_stable = (c_stats['c_cv'] < 0.3).mean() * 100\n",
|
"pct_stable = (c_stats[\"c_cv\"] < 0.3).mean() * 100\n",
|
||||||
"print(f\"{pct_stable:.1f}% of properties have CV < 0.3\")"
|
"print(f\"{pct_stable:.1f}% of properties have CV < 0.3\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -3299,7 +3336,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"unstable_c = c_stats.sort_values('c_cv', ascending=False)['c_cv'][:20]\n",
|
"unstable_c = c_stats.sort_values(\"c_cv\", ascending=False)[\"c_cv\"][:20]\n",
|
||||||
"display(unstable_c)\n",
|
"display(unstable_c)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"unstable_c_specific = random.randint(0, 20)\n",
|
"unstable_c_specific = random.randint(0, 20)\n",
|
||||||
|
|
@ -3308,41 +3345,43 @@
|
||||||
"print(unstable_c.index[unstable_c_specific][1])\n",
|
"print(unstable_c.index[unstable_c_specific][1])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# pc avg trend\n",
|
"# pc avg trend\n",
|
||||||
"temp_pc_avg = pc_avg_complex[pc_avg_complex['Postcode'] == unstable_c.index[unstable_c_specific][0]].sort_values(by='year')\n",
|
"temp_pc_avg = pc_avg_complex[\n",
|
||||||
|
" pc_avg_complex[\"Postcode\"] == unstable_c.index[unstable_c_specific][0]\n",
|
||||||
|
"].sort_values(by=\"year\")\n",
|
||||||
"display(temp_pc_avg)\n",
|
"display(temp_pc_avg)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# c for specific postcode\n",
|
"# c for specific postcode\n",
|
||||||
"temp_postcode = data_small[\n",
|
"temp_postcode = data_small[\n",
|
||||||
" (data_small['Postcode'] == unstable_c.index[unstable_c_specific][0]) \n",
|
" (data_small[\"Postcode\"] == unstable_c.index[unstable_c_specific][0])\n",
|
||||||
" # & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1]) \n",
|
" # & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1])\n",
|
||||||
"].sort_values(by='year')\n",
|
"].sort_values(by=\"year\")\n",
|
||||||
"display(temp_address)\n",
|
"display(temp_address)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# c for specific address\n",
|
"# c for specific address\n",
|
||||||
"temp_address = data_small[\n",
|
"temp_address = data_small[\n",
|
||||||
" (data_small['Postcode'] == unstable_c.index[unstable_c_specific][0]) \n",
|
" (data_small[\"Postcode\"] == unstable_c.index[unstable_c_specific][0])\n",
|
||||||
" & (data_small['Address per EPC'] == unstable_c.index[unstable_c_specific][1]) \n",
|
" & (data_small[\"Address per EPC\"] == unstable_c.index[unstable_c_specific][1])\n",
|
||||||
"].sort_values(by='year')\n",
|
"].sort_values(by=\"year\")\n",
|
||||||
"display(temp_address)\n",
|
"display(temp_address)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# plot\n",
|
"# plot\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig, ax1 = plt.subplots()\n",
|
"fig, ax1 = plt.subplots()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"temp_pc_avg.plot.line(x='year', y='Price per sqm PC AVG', ax=ax1, color='black')\n",
|
"temp_pc_avg.plot.line(x=\"year\", y=\"Price per sqm PC AVG\", ax=ax1, color=\"black\")\n",
|
||||||
"temp_address.plot.line(x='year', y='Price per sqm', ax=ax1, color='green') \n",
|
"temp_address.plot.line(x=\"year\", y=\"Price per sqm\", ax=ax1, color=\"green\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ax2 = ax1.twinx()\n",
|
"ax2 = ax1.twinx()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for property in temp_postcode['Address per EPC'].unique():\n",
|
"for property in temp_postcode[\"Address per EPC\"].unique():\n",
|
||||||
" property_data = temp_postcode[temp_postcode['Address per EPC'] == property]\n",
|
" property_data = temp_postcode[temp_postcode[\"Address per EPC\"] == property]\n",
|
||||||
" property_data.plot.line(x='year', y='c', ax=ax2, color='orange', style=':')\n",
|
" property_data.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"orange\", style=\":\")\n",
|
||||||
"temp_address.plot.line(x='year', y='c', ax=ax2, color='red', style=':')\n",
|
"temp_address.plot.line(x=\"year\", y=\"c\", ax=ax2, color=\"red\", style=\":\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ax1.set_ylabel('Price per sqm')\n",
|
"ax1.set_ylabel(\"Price per sqm\")\n",
|
||||||
"ax2.set_ylabel('c')\n",
|
"ax2.set_ylabel(\"c\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.show()\n"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -3370,11 +3409,11 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# select random address\n",
|
"# select random address\n",
|
||||||
"one_property = data_small.sample(1)[['Postcode', 'Address per EPC']].iloc[0]\n",
|
"one_property = data_small.sample(1)[[\"Postcode\", \"Address per EPC\"]].iloc[0]\n",
|
||||||
"postcode = one_property['Postcode']\n",
|
"postcode = one_property[\"Postcode\"]\n",
|
||||||
"address = one_property['Address per EPC']\n",
|
"address = one_property[\"Address per EPC\"]\n",
|
||||||
"print(f'Postcode: {postcode}')\n",
|
"print(f\"Postcode: {postcode}\")\n",
|
||||||
"print(f'Address: {address}')"
|
"print(f\"Address: {address}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -3481,22 +3520,21 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"property_data = data_small[\n",
|
"property_data = data_small[\n",
|
||||||
" (data_small['Postcode'] == postcode) \n",
|
" (data_small[\"Postcode\"] == postcode) & (data_small[\"Address per EPC\"] == address)\n",
|
||||||
" & (data_small['Address per EPC'] == address) \n",
|
|
||||||
"]\n",
|
"]\n",
|
||||||
"latest_year = property_data['year'].max()\n",
|
"latest_year = property_data[\"year\"].max()\n",
|
||||||
"print(f'Latest year of data: {latest_year}')\n",
|
"print(f\"Latest year of data: {latest_year}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Get only the latest year's data for this property (this is what we want to predict)\n",
|
"# Get only the latest year's data for this property (this is what we want to predict)\n",
|
||||||
"data_small_test = property_data[property_data['year'] == latest_year]\n",
|
"data_small_test = property_data[property_data[\"year\"] == latest_year]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Remove only the latest year's data from training (keep historical data for this property)\n",
|
"# Remove only the latest year's data from training (keep historical data for this property)\n",
|
||||||
"data_small_train = data_small.drop(data_small_test.index)\n",
|
"data_small_train = data_small.drop(data_small_test.index)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print()\n",
|
"print()\n",
|
||||||
"print(f'data_small.shape = {data_small.shape}')\n",
|
"print(f\"data_small.shape = {data_small.shape}\")\n",
|
||||||
"print(f'data_small_train.shape = {data_small_train.shape}')\n",
|
"print(f\"data_small_train.shape = {data_small_train.shape}\")\n",
|
||||||
"print(f'data_small_test.shape = {data_small_test.shape}')\n",
|
"print(f\"data_small_test.shape = {data_small_test.shape}\")\n",
|
||||||
"display(data_small_test)\n",
|
"display(data_small_test)\n",
|
||||||
"data_small.shape[0] == data_small_test.shape[0] + data_small_train.shape[0]"
|
"data_small.shape[0] == data_small_test.shape[0] + data_small_train.shape[0]"
|
||||||
]
|
]
|
||||||
|
|
@ -3607,9 +3645,9 @@
|
||||||
"# get latest c in data_small_train\n",
|
"# get latest c in data_small_train\n",
|
||||||
"\n",
|
"\n",
|
||||||
"latest_train_address = data_small_train[\n",
|
"latest_train_address = data_small_train[\n",
|
||||||
" (data_small_train['Postcode'] == postcode) \n",
|
" (data_small_train[\"Postcode\"] == postcode)\n",
|
||||||
" & (data_small_train['Address per EPC'] == address) \n",
|
" & (data_small_train[\"Address per EPC\"] == address)\n",
|
||||||
"].sort_values(by='year')\n",
|
"].sort_values(by=\"year\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"latest_train_address"
|
"latest_train_address"
|
||||||
]
|
]
|
||||||
|
|
@ -3630,10 +3668,10 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"latest_train_c = latest_train_address['c'].iloc[-1]\n",
|
"latest_train_c = latest_train_address[\"c\"].iloc[-1]\n",
|
||||||
"latest_train_pc_avg = latest_train_address['Price per sqm PC AVG'].iloc[-1]\n",
|
"latest_train_pc_avg = latest_train_address[\"Price per sqm PC AVG\"].iloc[-1]\n",
|
||||||
"print(f'Latest c in training data: {latest_train_c:.3f}')\n",
|
"print(f\"Latest c in training data: {latest_train_c:.3f}\")\n",
|
||||||
"print(f'Latest price per sqm in training data: {latest_train_pc_avg:.2f}') "
|
"print(f\"Latest price per sqm in training data: {latest_train_pc_avg:.2f}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -3654,7 +3692,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"latest_train_c * latest_train_pc_avg * data_small_test['Total floor area (sqm)'].iloc[0]"
|
"latest_train_c * latest_train_pc_avg * data_small_test[\"Total floor area (sqm)\"].iloc[0]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import sys
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
|
def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
|
||||||
img = Image.open(path).convert("RGBA")
|
img = Image.open(path).convert("RGBA")
|
||||||
pixels = img.load()
|
pixels = img.load()
|
||||||
|
|
@ -43,6 +44,7 @@ def remove_white_bg(path: str, tolerance: int = 20, out: str | None = None):
|
||||||
img.save(dest)
|
img.save(dest)
|
||||||
print(f"Saved to {dest} ({img.size[0]}x{img.size[1]})")
|
print(f"Saved to {dest} ({img.size[0]}x{img.size[1]})")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print("Usage: python remove_bg.py <image> [tolerance] [output]")
|
print("Usage: python remove_bg.py <image> [tolerance] [output]")
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ clap = { version = "4", features = ["derive", "env"] }
|
||||||
axum = "0.8"
|
axum = "0.8"
|
||||||
tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] }
|
tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] }
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
||||||
polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16"] }
|
polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16", "round_series"] }
|
||||||
h3o = "0.7"
|
h3o = "0.7"
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
|
|
|
||||||
981
server-rs/logs/server.log.2026-03-15
Normal file
981
server-rs/logs/server.log.2026-03-15
Normal file
|
|
@ -0,0 +1,981 @@
|
||||||
|
2026-03-15T19:07:11.371851Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T19:07:11.372040Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T19:07:11.372050Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T19:07:11.473120Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T19:07:11.473130Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T19:07:22.441464Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T19:07:22.441476Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T19:07:22.848304Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T19:07:22.848315Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T19:07:22.913269Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T19:07:22.913279Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T19:07:33.981737Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T19:07:33.981832Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
|
||||||
|
2026-03-15T19:07:35.443457Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T19:07:35.594896Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T19:07:36.004267Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T19:07:37.616212Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T19:07:39.772016Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T19:07:41.167109Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T19:07:49.922117Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T19:07:49.922128Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T19:07:50.511333Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T19:07:50.511342Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T19:07:51.475798Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T19:07:57.644399Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T19:08:00.205219Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T19:08:01.951719Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
|
||||||
|
2026-03-15T19:08:01.951728Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T19:08:02.049262Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T19:08:02.049271Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T19:08:02.477049Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T19:08:02.477606Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T19:08:02.477614Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T19:08:02.501985Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T19:08:02.612770Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T19:08:02.613426Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T19:08:02.652324Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T19:08:02.652334Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T19:08:02.658355Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T19:08:02.658366Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T19:08:02.660009Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T19:08:02.660839Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T19:08:02.660901Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T19:08:02.660910Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T19:08:02.660914Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T19:08:02.664216Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T19:08:04.225988Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T19:08:04.225999Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T19:08:04.226018Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T19:08:04.265284Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T19:08:04.310151Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T19:08:04.343659Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T19:08:04.343821Z INFO property_map_server: Precomputed features response groups=9
|
||||||
|
2026-03-15T19:08:04.343836Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T19:08:04.432857Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T19:08:04.438796Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T19:08:04.443153Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T19:08:04.727071Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfectpostcodes.schmelczer.dev/pb
|
||||||
|
2026-03-15T19:08:04.742347Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T19:08:04.742398Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T19:08:04.742414Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T19:08:04.783484Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T19:08:04.784506Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T19:08:04.785961Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T19:08:04.794234Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T19:08:04.794259Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T19:08:04.794312Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T19:08:07.250052Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T19:08:07.250147Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
2026-03-15T19:08:43.332953Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=905 cells_after_filter=904 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=70.2 total_ms=82.4
|
||||||
|
2026-03-15T19:08:44.121660Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=68.3 total_ms=84.2
|
||||||
|
2026-03-15T19:08:45.206450Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=71.1 total_ms=94.8
|
||||||
|
2026-03-15T19:08:45.422613Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1356 cells_after_filter=1351 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1077.7 total_ms=1109.0
|
||||||
|
2026-03-15T19:08:47.208854Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=68.0 total_ms=79.9
|
||||||
|
2026-03-15T19:08:47.294458Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1356 cells_after_filter=1351 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1074.4 total_ms=1104.8
|
||||||
|
2026-03-15T19:08:48.178659Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=905 cells_after_filter=904 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=79.6 total_ms=86.6
|
||||||
|
2026-03-15T19:08:48.443812Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1337 cells_after_filter=1335 truncated=false bounds=50.5989,-1.9210,52.4537,1.7355 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=71.5 total_ms=83.5
|
||||||
|
2026-03-15T19:08:51.505423Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.0
|
||||||
|
2026-03-15T19:08:52.293946Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=55.5 total_ms=64.3
|
||||||
|
2026-03-15T19:09:12.606611Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=86195dac7ffffff resolution=6 total=602 returned=100 offset=0 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.4
|
||||||
|
2026-03-15T19:09:17.589934Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=25877 filters=1 filters_raw="Listing status:Historical sale" ms=12.0
|
||||||
|
2026-03-15T19:09:19.152498Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1021 cells_after_filter=1017 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=944.3 total_ms=960.2
|
||||||
|
2026-03-15T19:09:19.587280Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.0
|
||||||
|
2026-03-15T19:09:20.120819Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=59.6 total_ms=68.9
|
||||||
|
2026-03-15T19:09:21.757704Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=86195dac7ffffff resolution=6 total=602 returned=100 offset=0 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.3
|
||||||
|
2026-03-15T19:09:26.458314Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=25877 filters=1 filters_raw="Listing status:Historical sale" ms=12.2
|
||||||
|
2026-03-15T19:09:26.734420Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.1
|
||||||
|
2026-03-15T19:09:27.270779Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=59.2 total_ms=68.7
|
||||||
|
2026-03-15T19:09:27.465769Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=29 filters=1 filters_raw="Listing status:For rent" ms=1.5
|
||||||
|
2026-03-15T19:09:27.551401Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1021 cells_after_filter=1017 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=938.9 total_ms=955.1
|
||||||
|
2026-03-15T19:09:27.636702Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=688 cells_after_filter=688 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=64.2 total_ms=69.4
|
||||||
|
2026-03-15T19:09:28.107949Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=1.0
|
||||||
|
2026-03-15T19:09:28.647682Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=57.9 total_ms=66.8
|
||||||
|
2026-03-15T19:10:43.696300Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=29 filters=1 filters_raw="Listing status:For rent" ms=1.5
|
||||||
|
2026-03-15T19:10:43.917385Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=688 cells_after_filter=688 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=66.4 total_ms=72.3
|
||||||
|
2026-03-15T19:10:44.773099Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=86195dac7ffffff resolution=6 total_count=602 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" ms=0.9
|
||||||
|
2026-03-15T19:10:45.309165Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1004 cells_after_filter=1004 truncated=false bounds=50.5989,-1.3279,52.4537,1.1425 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=55.8 total_ms=65.3
|
||||||
|
2026-03-15T19:11:44.827084Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:11:45.019108Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:11:45.539093Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=67.3 total_ms=79.7
|
||||||
|
2026-03-15T19:12:22.900048Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1266.0 total_ms=1293.0
|
||||||
|
2026-03-15T19:12:43.487506Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=97.7 total_ms=119.2
|
||||||
|
2026-03-15T19:12:56.981906Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=92.3 total_ms=105.6
|
||||||
|
2026-03-15T19:15:16.007945Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T19:15:16.008103Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T19:15:16.008110Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T19:15:16.129677Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T19:15:16.129690Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T19:15:25.994671Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T19:15:25.994690Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T19:15:26.528002Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T19:15:26.528016Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T19:15:26.610377Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T19:15:26.610387Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T19:16:11.721573Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T19:16:11.721688Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
|
||||||
|
2026-03-15T19:16:13.315773Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T19:16:13.525913Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T19:16:14.026360Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T19:16:15.880642Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T19:16:18.439507Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T19:16:20.159326Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T19:16:22.764657Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T19:16:22.764667Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T19:16:23.416811Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T19:16:23.416820Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T19:16:24.612917Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T19:16:31.709728Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T19:16:34.550669Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T19:16:36.027023Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
|
||||||
|
2026-03-15T19:16:36.027032Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T19:16:36.427705Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T19:16:36.427713Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T19:16:36.848803Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T19:16:36.848848Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T19:16:36.848862Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T19:16:36.922825Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T19:16:37.040700Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T19:16:37.041316Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T19:16:37.080709Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T19:16:37.080719Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T19:16:37.087492Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T19:16:37.087505Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T19:16:37.092797Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T19:16:37.093630Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T19:16:37.093696Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T19:16:37.093709Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T19:16:37.093714Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T19:16:37.097696Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T19:16:40.166666Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T19:16:40.166676Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T19:16:40.166689Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T19:16:40.223398Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T19:16:40.267971Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T19:16:40.329251Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T19:16:40.329417Z INFO property_map_server: Precomputed features response groups=9
|
||||||
|
2026-03-15T19:16:40.329432Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T19:16:40.466894Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T19:16:40.474329Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T19:16:40.488014Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T19:16:40.724141Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfectpostcodes.schmelczer.dev/pb
|
||||||
|
2026-03-15T19:16:40.728811Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T19:16:40.728841Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T19:16:40.728857Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T19:16:40.781239Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T19:16:40.786720Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T19:16:40.789828Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T19:16:40.812650Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T19:16:40.812688Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T19:16:40.812745Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T19:16:44.806149Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T19:16:44.806208Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
2026-03-15T19:18:41.554399Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1102.3 total_ms=1142.0
|
||||||
|
2026-03-15T19:18:51.943051Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1089.3 total_ms=1111.0
|
||||||
|
2026-03-15T19:18:52.235271Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=88.8 total_ms=101.0
|
||||||
|
2026-03-15T19:18:53.034978Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=71.2 total_ms=79.2
|
||||||
|
2026-03-15T19:18:54.485285Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=86.6 total_ms=98.8
|
||||||
|
2026-03-15T19:18:56.331564Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1100.7 total_ms=1122.4
|
||||||
|
2026-03-15T19:18:57.013412Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=86.3 total_ms=98.3
|
||||||
|
2026-03-15T19:18:57.497646Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=80.4 total_ms=87.3
|
||||||
|
2026-03-15T19:18:58.307229Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=86.4 total_ms=98.6
|
||||||
|
2026-03-15T19:18:59.209272Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=92.3 total_ms=115.9
|
||||||
|
2026-03-15T19:18:59.605162Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1104.1 total_ms=1125.8
|
||||||
|
2026-03-15T19:19:00.641552Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=101.5 total_ms=114.0
|
||||||
|
2026-03-15T19:19:01.055691Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1102.8 total_ms=1133.7
|
||||||
|
2026-03-15T19:19:01.767817Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=70.5 total_ms=77.5
|
||||||
|
2026-03-15T19:19:02.094672Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=95.1 total_ms=107.1
|
||||||
|
2026-03-15T19:19:04.696909Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=1094.7 total_ms=1116.4
|
||||||
|
2026-03-15T19:20:18.135252Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=87.8 total_ms=99.9
|
||||||
|
2026-03-15T19:21:06.589853Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=70.6 total_ms=77.5
|
||||||
|
2026-03-15T19:22:21.723624Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T19:22:21.723777Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T19:22:21.723788Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T19:22:21.792919Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T19:22:21.792931Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T19:24:04.122070Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T19:24:04.122238Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T19:24:04.122243Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T19:24:04.183691Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T19:24:04.183700Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T19:24:10.703405Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T19:24:10.703567Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T19:24:10.703573Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T19:24:10.764393Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T19:24:10.764404Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T19:24:13.175087Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T19:24:13.175096Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T19:24:14.743132Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T19:24:14.743145Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T19:24:15.276707Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T19:24:15.276719Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T19:32:31.326505Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T19:32:31.326667Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T19:32:31.326674Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T19:32:31.468299Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T19:32:31.468311Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T19:32:36.199335Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T19:32:36.199348Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T19:32:36.594288Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T19:32:36.594299Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T19:32:36.659669Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T19:32:36.659679Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T19:33:03.760178Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T19:33:03.760265Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
|
||||||
|
2026-03-15T19:33:05.203275Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T19:33:05.385487Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T19:33:05.825530Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T19:33:07.452637Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T19:33:09.737556Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T19:33:11.190065Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T19:33:13.640495Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T19:33:13.640506Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T19:33:14.224090Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T19:33:14.224101Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T19:33:15.218314Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T19:33:21.691381Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T19:33:24.347401Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T19:33:26.411071Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
|
||||||
|
2026-03-15T19:33:26.411081Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T19:33:26.535487Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T19:33:26.535498Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T19:33:27.113000Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T19:33:27.113028Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T19:33:27.113036Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T19:33:27.165121Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T19:33:27.304878Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T19:33:27.305504Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T19:33:27.346347Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T19:33:27.346357Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T19:33:27.352391Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T19:33:27.352399Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T19:33:27.365758Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T19:33:27.366584Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T19:33:27.366644Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T19:33:27.366655Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T19:33:27.366659Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T19:33:27.392761Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T19:33:36.895174Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T19:33:36.895188Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T19:33:36.895208Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T19:33:36.998297Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T19:33:37.053356Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T19:33:37.109062Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T19:33:37.109355Z INFO property_map_server: Precomputed features response groups=9
|
||||||
|
2026-03-15T19:33:37.109374Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T19:33:37.762412Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T19:33:37.767896Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T19:33:37.775340Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T19:33:38.060153Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
|
||||||
|
2026-03-15T19:33:38.063925Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T19:33:38.063950Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T19:33:38.063967Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T19:33:38.278834Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T19:33:38.287416Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T19:33:38.292977Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T19:33:38.359034Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T19:33:38.359070Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T19:33:38.359129Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T19:33:48.937764Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T19:33:48.937811Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
2026-03-15T19:33:49.510480Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:33:49.711250Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:33:49.717966Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:33:49.758705Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:33:50.515563Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:33:50.515611Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:33:50.526782Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:33:50.526799Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:33:50.624015Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:33:50.954642Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:33:51.089161Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:33:51.089169Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:33:51.344202Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=148.7 total_ms=177.0
|
||||||
|
2026-03-15T19:34:49.486571Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:34:49.486761Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:34:50.105387Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=147.2 total_ms=181.6
|
||||||
|
2026-03-15T19:35:47.333901Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:35:47.633561Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:35:47.964989Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=127.2 total_ms=151.0
|
||||||
|
2026-03-15T19:36:10.914163Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:36:10.935314Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:36:11.591360Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated current price:23297.51:inf" travel_entries=0 agg_ms=148.2 total_ms=179.1
|
||||||
|
2026-03-15T19:37:10.487304Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For sale;;Asking price:23297.51:inf" travel_entries=0 agg_ms=30.4 total_ms=42.4
|
||||||
|
2026-03-15T19:37:11.520281Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=34.6 total_ms=41.1
|
||||||
|
2026-03-15T19:37:12.429971Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=92.6 total_ms=114.8
|
||||||
|
2026-03-15T19:39:01.324173Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=78.6 total_ms=100.6
|
||||||
|
2026-03-15T19:39:11.452418Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=34.6 total_ms=46.5
|
||||||
|
2026-03-15T19:39:12.258031Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=104.1 total_ms=126.5
|
||||||
|
2026-03-15T19:39:13.209101Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=30.2 total_ms=41.9
|
||||||
|
2026-03-15T19:39:13.542817Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=86.2 total_ms=108.5
|
||||||
|
2026-03-15T19:39:17.811115Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=78.0 total_ms=100.1
|
||||||
|
2026-03-15T19:39:18.543391Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=31.1 total_ms=42.7
|
||||||
|
2026-03-15T19:39:21.422553Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=99.7 total_ms=122.0
|
||||||
|
2026-03-15T19:39:24.089893Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=65.8 total_ms=87.6
|
||||||
|
2026-03-15T19:39:34.096698Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=878 cells_after_filter=877 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For rent;;Asking rent (monthly):300:7750" travel_entries=0 agg_ms=34.2 total_ms=40.6
|
||||||
|
2026-03-15T19:39:35.136941Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=77.3 total_ms=99.4
|
||||||
|
2026-03-15T19:39:35.997965Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=878 cells_after_filter=877 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For rent;;Asking rent (monthly):300:7750" travel_entries=0 agg_ms=27.5 total_ms=34.4
|
||||||
|
2026-03-15T19:39:36.896448Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=30.0 total_ms=43.4
|
||||||
|
2026-03-15T19:39:37.822906Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=97.3 total_ms=119.7
|
||||||
|
2026-03-15T19:39:38.005882Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=31.8 total_ms=43.6
|
||||||
|
2026-03-15T19:39:38.307634Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=32.0 total_ms=38.7
|
||||||
|
2026-03-15T19:39:39.034035Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=32.7 total_ms=44.5
|
||||||
|
2026-03-15T19:39:39.374475Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=100.4 total_ms=122.4
|
||||||
|
2026-03-15T19:39:39.415811Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=39.0 total_ms=50.9
|
||||||
|
2026-03-15T19:39:40.267881Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=28.4 total_ms=35.2
|
||||||
|
2026-03-15T19:39:40.542606Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=35.2 total_ms=47.1
|
||||||
|
2026-03-15T19:39:40.845586Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=88.6 total_ms=110.3
|
||||||
|
2026-03-15T19:39:42.206069Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=32.7 total_ms=44.7
|
||||||
|
2026-03-15T19:39:43.318640Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=27.1 total_ms=33.6
|
||||||
|
2026-03-15T19:39:44.688592Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=96.4 total_ms=117.7
|
||||||
|
2026-03-15T19:39:45.172008Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=29.2 total_ms=36.4
|
||||||
|
2026-03-15T19:39:45.850790Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=40.1 total_ms=51.9
|
||||||
|
2026-03-15T19:39:46.189922Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=29.0 total_ms=35.7
|
||||||
|
2026-03-15T19:39:46.998212Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=86.0 total_ms=108.9
|
||||||
|
2026-03-15T19:39:47.177336Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=29.1 total_ms=35.8
|
||||||
|
2026-03-15T19:39:47.696691Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=96.6 total_ms=118.0
|
||||||
|
2026-03-15T19:39:48.556326Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=911 cells_after_filter=910 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=28.2 total_ms=35.0
|
||||||
|
2026-03-15T19:39:48.906118Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=107.8 total_ms=129.7
|
||||||
|
2026-03-15T19:39:51.395259Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1374 cells_after_filter=1370 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:Historical sale;;Estimated monthly rent:300:7750" travel_entries=0 agg_ms=66.3 total_ms=88.4
|
||||||
|
2026-03-15T19:39:58.878189Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=878 cells_after_filter=877 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=2 filters_raw="Listing status:For rent;;Asking rent (monthly):300:7750" travel_entries=0 agg_ms=31.0 total_ms=37.7
|
||||||
|
2026-03-15T19:40:04.517960Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1349 cells_after_filter=1348 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=31.5 total_ms=44.3
|
||||||
|
2026-03-15T19:40:17.346685Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=96.9 total_ms=119.8
|
||||||
|
2026-03-15T19:47:52.591641Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T19:47:52.591925Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T19:47:53.240629Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=7065688 parallel=true cells_before_filter=1376 cells_after_filter=1372 truncated=false bounds=50.5887,-1.9407,52.4636,1.7553 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=95.9 total_ms=117.5
|
||||||
|
2026-03-15T19:48:12.279153Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=4253126 parallel=true cells_before_filter=3093 cells_after_filter=3064 truncated=false bounds=50.9497,-0.9772,51.9159,0.9233 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=54.4 total_ms=137.0
|
||||||
|
2026-03-15T19:48:13.512341Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=2883111 parallel=true cells_before_filter=1105 cells_after_filter=1088 truncated=false bounds=51.1146,-0.5552,51.6729,0.5420 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=36.5 total_ms=54.4
|
||||||
|
2026-03-15T19:48:14.110557Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=8 rows=1687410 parallel=true cells_before_filter=2479 cells_after_filter=2434 truncated=false bounds=51.2171,-0.3474,51.5584,0.3232 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=22.6 total_ms=61.3
|
||||||
|
2026-03-15T19:48:16.632555Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=1007854 parallel=true cells_before_filter=5324 cells_after_filter=5035 truncated=false bounds=51.3626,-0.1740,51.5564,0.2074 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=16.9 total_ms=117.8
|
||||||
|
2026-03-15T19:51:26.541773Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=190938 parallel=true cells_before_filter=1076 cells_after_filter=1033 truncated=false bounds=51.4310,-0.0323,51.5159,0.1349 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=3.8 total_ms=20.0
|
||||||
|
2026-03-15T19:51:30.533448Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=384437 parallel=true cells_before_filter=2704 cells_after_filter=2497 truncated=false bounds=51.4379,0.0032,51.5871,0.2970 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=7.4 total_ms=46.9
|
||||||
|
2026-03-15T19:51:31.069688Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=632131 parallel=true cells_before_filter=3388 cells_after_filter=3150 truncated=false bounds=51.4091,-0.1211,51.5583,0.1727 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=10.9 total_ms=60.6
|
||||||
|
2026-03-15T19:51:35.046811Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=65919 parallel=true cells_before_filter=341 cells_after_filter=288 truncated=false bounds=51.4524,-0.0211,51.4936,0.0599 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=6.2
|
||||||
|
2026-03-15T19:51:36.299041Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=366 cells_after_filter=298 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.9 total_ms=6.5
|
||||||
|
2026-03-15T19:54:34.842844Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=102 filters=1 filters_raw="Listing status:Historical sale" ms=0.1
|
||||||
|
2026-03-15T19:54:35.089458Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=5.0
|
||||||
|
2026-03-15T19:54:36.130976Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=89194ad2ec7ffff resolution=9 total=102 returned=100 offset=0 filters=1 filters_raw="Listing status:Historical sale" ms=0.4
|
||||||
|
2026-03-15T19:54:38.679953Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:54:38.832683Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=2.4
|
||||||
|
2026-03-15T19:55:05.573712Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=102 filters=1 filters_raw="Listing status:Historical sale" ms=0.1
|
||||||
|
2026-03-15T19:55:06.053141Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.4 total_ms=4.8
|
||||||
|
2026-03-15T19:55:06.289059Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ec7ffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:55:06.464008Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.7 total_ms=2.4
|
||||||
|
2026-03-15T19:55:07.257113Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=329 cells_after_filter=275 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=3.6
|
||||||
|
2026-03-15T19:55:10.176114Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=20 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:55:10.449590Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.6
|
||||||
|
2026-03-15T19:55:11.581365Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=89194ad2ecfffff resolution=9 total=20 returned=20 offset=0 filters=1 filters_raw="Listing status:For sale" ms=0.3
|
||||||
|
2026-03-15T19:55:12.826310Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=1 filters=1 filters_raw="Listing status:For rent" ms=0.1
|
||||||
|
2026-03-15T19:55:13.296370Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=69 cells_after_filter=54 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For rent" travel_entries=0 agg_ms=0.4 total_ms=0.8
|
||||||
|
2026-03-15T19:55:14.274431Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=20 filters=1 filters_raw="Listing status:For sale" ms=0.2
|
||||||
|
2026-03-15T19:55:14.745705Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.6
|
||||||
|
2026-03-15T19:55:15.410862Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.7 total_ms=2.6
|
||||||
|
2026-03-15T19:55:15.973540Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=285 filters=1 filters_raw="Listing status:Historical sale" ms=0.2
|
||||||
|
2026-03-15T19:55:16.149036Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=5.2
|
||||||
|
2026-03-15T19:55:50.335806Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=366 cells_after_filter=298 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.8 total_ms=6.5
|
||||||
|
2026-03-15T19:55:51.579153Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2e53ffff resolution=9 total_count=119 filters=1 filters_raw="Listing status:Historical sale" ms=0.1
|
||||||
|
2026-03-15T19:55:51.847817Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=295 cells_after_filter=209 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:Historical sale" travel_entries=0 agg_ms=1.6 total_ms=4.8
|
||||||
|
2026-03-15T19:57:08.597832Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=5 rows=14962301 parallel=true cells_before_filter=687 cells_after_filter=687 truncated=false bounds=46.0000,-12.0000,56.5000,12.0000 filters=0 filters_raw="-" travel_entries=0 agg_ms=206.2 total_ms=218.4
|
||||||
|
2026-03-15T19:58:56.459660Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2e53ffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:58:56.611313Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.8
|
||||||
|
2026-03-15T19:58:58.208823Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=63016 parallel=true cells_before_filter=329 cells_after_filter=275 truncated=false bounds=51.4463,-0.0284,51.4875,0.0526 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=1.0 total_ms=4.1
|
||||||
|
2026-03-15T19:58:59.418179Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2e5bffff resolution=9 total_count=5 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:58:59.646741Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=51710 parallel=true cells_before_filter=263 cells_after_filter=194 truncated=false bounds=51.4463,-0.0153,51.4875,0.0394 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.7 total_ms=2.5
|
||||||
|
2026-03-15T19:59:13.261566Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad2ecfffff resolution=9 total_count=20 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:59:16.381213Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad216fffff resolution=9 total_count=2 filters=1 filters_raw="Listing status:For sale" ms=0.1
|
||||||
|
2026-03-15T19:59:19.468079Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=65337 parallel=true cells_before_filter=263 cells_after_filter=187 truncated=false bounds=51.4575,-0.0360,51.4986,0.0187 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.8 total_ms=2.8
|
||||||
|
2026-03-15T19:59:20.912780Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=57987 parallel=true cells_before_filter=230 cells_after_filter=192 truncated=false bounds=51.4560,-0.0422,51.4972,0.0126 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=2.4
|
||||||
|
2026-03-15T19:59:22.439657Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=45 filters=1 filters_raw="Listing status:For sale" ms=0.2
|
||||||
|
2026-03-15T19:59:25.975325Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=89194ad04a7ffff resolution=9 total=45 returned=45 offset=0 filters=1 filters_raw="Listing status:For sale" ms=0.4
|
||||||
|
2026-03-15T19:59:44.130233Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=27673 parallel=false cells_before_filter=124 cells_after_filter=79 truncated=false bounds=51.4630,-0.0335,51.4874,-0.0010 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=1.4
|
||||||
|
2026-03-15T19:59:44.863513Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=23785 parallel=false cells_before_filter=105 cells_after_filter=82 truncated=false bounds=51.4612,-0.0378,51.4857,-0.0053 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.6 total_ms=1.4
|
||||||
|
2026-03-15T19:59:45.429953Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=10363 parallel=false cells_before_filter=39 cells_after_filter=27 truncated=false bounds=51.4668,-0.0300,51.4794,-0.0133 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=0.2 total_ms=0.6
|
||||||
|
2026-03-15T19:59:47.052444Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=69 postcodes_after_filter=22 filtered_out=47 truncated=false bounds=51.469424,-0.026465,51.476579,-0.016948 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.8
|
||||||
|
2026-03-15T19:59:48.970011Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=8 filtered_out=26 truncated=false bounds=51.470330,-0.025601,51.476006,-0.018052 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.5
|
||||||
|
2026-03-15T19:59:49.463765Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=4 filtered_out=30 truncated=false bounds=51.471351,-0.024627,51.475359,-0.019295 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.3
|
||||||
|
2026-03-15T19:59:51.056899Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=9 filtered_out=25 truncated=false bounds=51.471780,-0.026331,51.475789,-0.020998 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.4
|
||||||
|
2026-03-15T19:59:51.581180Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=6 filtered_out=28 truncated=false bounds=51.471674,-0.025659,51.475683,-0.020327 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.4
|
||||||
|
2026-03-15T19:59:58.001176Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=34 postcodes_after_filter=8 filtered_out=26 truncated=false bounds=51.471124,-0.025948,51.476103,-0.019326 filters=1 filters_raw="Listing status:For sale" travel_entries=0 total_ms=0.6
|
||||||
|
2026-03-15T20:00:01.147529Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=44 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold|Leasehold" ms=0.2
|
||||||
|
2026-03-15T20:00:01.649333Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=33 postcodes_after_filter=7 filtered_out=26 truncated=false bounds=51.471124,-0.025948,51.476103,-0.019326 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold|Leasehold" travel_entries=0 total_ms=0.3
|
||||||
|
2026-03-15T20:00:03.524035Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=1 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" ms=0.2
|
||||||
|
2026-03-15T20:00:03.982950Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=5 postcodes_after_filter=1 filtered_out=4 truncated=false bounds=51.471124,-0.025948,51.476103,-0.019326 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 total_ms=0.2
|
||||||
|
2026-03-15T20:00:05.734272Z INFO property_map_server::routes::postcodes: GET /api/postcodes postcodes_before_filter=16 postcodes_after_filter=10 filtered_out=6 truncated=false bounds=51.468325,-0.029578,51.477865,-0.016889 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 total_ms=0.4
|
||||||
|
2026-03-15T20:00:07.598747Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=371660 parallel=true cells_before_filter=1032 cells_after_filter=874 truncated=false bounds=51.4033,-0.1135,51.5201,0.0418 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.3 total_ms=9.7
|
||||||
|
2026-03-15T20:00:07.902439Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=277117 parallel=true cells_before_filter=961 cells_after_filter=829 truncated=false bounds=51.4051,-0.0436,51.5218,0.1117 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.0 total_ms=9.7
|
||||||
|
2026-03-15T20:00:08.868329Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=410619 parallel=true cells_before_filter=1446 cells_after_filter=1288 truncated=false bounds=51.3867,-0.0636,51.5340,0.1322 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.5 total_ms=14.6
|
||||||
|
2026-03-15T20:00:10.739310Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=38 filters=2 filters_raw="Listing status:Historical sale;;Leasehold/Freehold:Freehold" ms=0.2
|
||||||
|
2026-03-15T20:00:11.236022Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=410619 parallel=true cells_before_filter=2260 cells_after_filter=1984 truncated=false bounds=51.3867,-0.0636,51.5340,0.1322 filters=2 filters_raw="Listing status:Historical sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=5.3 total_ms=38.1
|
||||||
|
2026-03-15T20:00:12.173963Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=1 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" ms=0.2
|
||||||
|
2026-03-15T20:00:12.666440Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=410619 parallel=true cells_before_filter=1446 cells_after_filter=1288 truncated=false bounds=51.3867,-0.0636,51.5340,0.1322 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=2.6 total_ms=15.7
|
||||||
|
2026-03-15T20:00:19.409064Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=89194ad04a7ffff resolution=9 total_count=0 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" ms=0.2
|
||||||
|
2026-03-15T20:00:19.968653Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=2.2 total_ms=2.2
|
||||||
|
2026-03-15T20:00:20.722189Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=1971 cells_after_filter=1801 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=3.7 total_ms=6.2
|
||||||
|
2026-03-15T20:00:21.190329Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=2.3 total_ms=2.3
|
||||||
|
2026-03-15T20:00:27.942711Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=571405 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.3867,-0.1106,51.5340,0.1792 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:9200" travel_entries=0 agg_ms=2.1 total_ms=2.1
|
||||||
|
2026-03-15T20:00:29.461340Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:9200" travel_entries=0 agg_ms=1.9 total_ms=1.9
|
||||||
|
2026-03-15T20:00:31.709909Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:9200" travel_entries=0 agg_ms=2.1 total_ms=2.1
|
||||||
|
2026-03-15T20:00:32.512895Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=1836 cells_after_filter=1678 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=3.2 total_ms=5.6
|
||||||
|
2026-03-15T20:00:33.940425Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=525321 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.4145,-0.0678,51.5616,0.2220 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=1.6 total_ms=1.6
|
||||||
|
2026-03-15T20:00:34.849329Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3141817 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.1389,-0.4994,51.7648,0.7321 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=14.9 total_ms=14.9
|
||||||
|
2026-03-15T20:00:35.804047Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3560390 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.0590,-0.6228,51.8363,0.9068 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=9.3 total_ms=9.3
|
||||||
|
2026-03-15T20:00:36.131775Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3610110 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.1661,-0.6093,51.9416,0.9202 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1700" travel_entries=0 agg_ms=9.3 total_ms=9.3
|
||||||
|
2026-03-15T20:00:37.313585Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3610110 parallel=true cells_before_filter=1788 cells_after_filter=1780 truncated=false bounds=51.1661,-0.6093,51.9416,0.9202 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=11.8 total_ms=14.2
|
||||||
|
2026-03-15T20:00:40.426451Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3610110 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.1661,-0.6093,51.9416,0.9202 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=10.8 total_ms=10.8
|
||||||
|
2026-03-15T20:00:42.361059Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=9.4 total_ms=9.4
|
||||||
|
2026-03-15T20:00:43.409927Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1193 cells_after_filter=1182 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=10.5 total_ms=12.0
|
||||||
|
2026-03-15T20:00:46.333117Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=8.8 total_ms=8.8
|
||||||
|
2026-03-15T20:00:48.082494Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=8.4 total_ms=8.4
|
||||||
|
2026-03-15T20:00:49.833842Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1193 cells_after_filter=1182 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=10.1 total_ms=23.3
|
||||||
|
2026-03-15T20:01:02.011997Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1094 cells_after_filter=1088 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=11.0 total_ms=21.9
|
||||||
|
2026-03-15T20:01:03.418647Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1193 cells_after_filter=1182 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=2 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold" travel_entries=0 agg_ms=11.4 total_ms=13.1
|
||||||
|
2026-03-15T20:01:03.571497Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1094 cells_after_filter=1088 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=11.2 total_ms=12.9
|
||||||
|
2026-03-15T20:01:08.874337Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1054 cells_after_filter=1048 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204" travel_entries=0 agg_ms=12.2 total_ms=23.6
|
||||||
|
2026-03-15T20:01:14.467687Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.8 total_ms=9.8
|
||||||
|
2026-03-15T20:01:15.872578Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=1054 cells_after_filter=1048 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204" travel_entries=0 agg_ms=12.0 total_ms=13.6
|
||||||
|
2026-03-15T20:01:16.342009Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=10.0 total_ms=10.0
|
||||||
|
2026-03-15T20:01:18.114175Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4515,51.8453,0.7617 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=10.2 total_ms=10.2
|
||||||
|
2026-03-15T20:01:40.296993Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3094495 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2300,-0.4517,51.8453,0.7619 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.8 total_ms=9.8
|
||||||
|
2026-03-15T20:02:48.056601Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3111647 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2295,-0.4528,51.8458,0.7630 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.6 total_ms=9.6
|
||||||
|
2026-03-15T20:02:49.785245Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Est. price per sqm:20:918535" travel_entries=0 agg_ms=9.4 total_ms=9.4
|
||||||
|
2026-03-15T20:03:02.786058Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=1285 cells_after_filter=1278 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204" travel_entries=0 agg_ms=13.1 total_ms=26.2
|
||||||
|
2026-03-15T20:03:23.742245Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=4 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Total floor area (sqm):0.1:204;;Price per sqm:5:1519231" travel_entries=0 agg_ms=10.0 total_ms=10.0
|
||||||
|
2026-03-15T20:03:30.904544Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=3 filters_raw="Listing status:For sale;;Leasehold/Freehold:Freehold;;Price per sqm:5:1519231" travel_entries=0 agg_ms=9.0 total_ms=9.0
|
||||||
|
2026-03-15T20:03:31.700115Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:5:1519231" travel_entries=0 agg_ms=9.5 total_ms=9.5
|
||||||
|
2026-03-15T20:03:33.123759Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:5:1519231" travel_entries=0 agg_ms=10.7 total_ms=10.7
|
||||||
|
2026-03-15T20:03:33.264092Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:5:2500" travel_entries=0 agg_ms=8.3 total_ms=8.3
|
||||||
|
2026-03-15T20:03:34.235435Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=1479 cells_after_filter=1465 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=10.7 total_ms=12.9
|
||||||
|
2026-03-15T20:03:34.567324Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=9.0 total_ms=9.0
|
||||||
|
2026-03-15T20:03:36.096362Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=3349513 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=51.2195,-0.5956,51.8557,0.9057 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=7.6 total_ms=7.6
|
||||||
|
2026-03-15T20:03:37.751074Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=7 rows=4814137 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=50.9471,-0.9801,52.1136,1.7728 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=11.8 total_ms=11.8
|
||||||
|
2026-03-15T20:03:38.835277Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=56.1 total_ms=56.1
|
||||||
|
2026-03-15T20:03:40.887729Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=49.8 total_ms=49.8
|
||||||
|
2026-03-15T20:03:42.529302Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3274 cells_after_filter=3270 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:Historical sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=84.4 total_ms=146.3
|
||||||
|
2026-03-15T20:03:44.390098Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=0 cells_after_filter=0 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Price per sqm:1400:2500" travel_entries=0 agg_ms=54.6 total_ms=54.6
|
||||||
|
2026-03-15T20:04:33.371452Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:04:33.371601Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:04:33.371608Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:04:33.457625Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:04:33.457635Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:04:35.974733Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:04:35.974742Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:04:36.398745Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:04:36.398757Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:04:36.476688Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:04:36.476699Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:04:46.252075Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:04:46.252184Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
|
||||||
|
2026-03-15T20:04:47.610246Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T20:04:47.804418Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T20:04:48.220314Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T20:04:49.335558Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T20:04:51.577686Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T20:04:53.025870Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T20:04:55.151481Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T20:04:55.151490Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T20:04:55.671708Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T20:04:55.671716Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T20:04:56.573495Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T20:05:02.540797Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T20:05:05.103322Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T20:05:06.794645Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
|
||||||
|
2026-03-15T20:05:06.794655Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T20:05:06.892926Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T20:05:06.892936Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T20:05:07.310766Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T20:05:07.310799Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T20:05:07.310815Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T20:05:07.355286Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T20:05:07.468648Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T20:05:07.469230Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T20:05:07.506618Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T20:05:07.506627Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T20:05:07.512084Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T20:05:07.512091Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T20:05:07.525343Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T20:05:07.526115Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T20:05:07.526170Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T20:05:07.526181Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T20:05:07.526186Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T20:05:07.536262Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T20:05:15.228511Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T20:05:15.228521Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T20:05:15.228532Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T20:05:15.231530Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T20:05:15.273794Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T20:05:15.309107Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T20:05:15.309278Z INFO property_map_server: Precomputed features response groups=9
|
||||||
|
2026-03-15T20:05:15.309292Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T20:05:15.446941Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T20:05:15.454051Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T20:05:15.459669Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T20:05:15.591403Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
|
||||||
|
2026-03-15T20:05:15.597637Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T20:05:15.597658Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T20:05:15.597670Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T20:05:15.643613Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T20:05:15.644670Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T20:05:15.646043Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T20:05:15.653830Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T20:05:15.653847Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T20:05:15.653891Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T20:05:19.156377Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T20:05:19.156422Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
2026-03-15T20:05:27.513039Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:05:27.517384Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:05:27.524994Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:05:27.531774Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:05:27.537007Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:05:27.542722Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:05:27.549237Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:05:27.563523Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:05:28.096739Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:05:28.096753Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:06:11.902556Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=55.7 total_ms=84.8
|
||||||
|
2026-03-15T20:08:11.734568Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=56.9 total_ms=83.5
|
||||||
|
2026-03-15T20:08:12.991872Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=70.2 total_ms=76.3
|
||||||
|
2026-03-15T20:08:13.507957Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=59.5 total_ms=63.9
|
||||||
|
2026-03-15T20:08:19.338486Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=74.5 total_ms=102.6
|
||||||
|
2026-03-15T20:08:20.686317Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=55.0 total_ms=59.5
|
||||||
|
2026-03-15T20:08:20.922174Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=60.6 total_ms=65.2
|
||||||
|
2026-03-15T20:08:26.772910Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3082 cells_after_filter=3081 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:788" travel_entries=0 agg_ms=62.6 total_ms=89.4
|
||||||
|
2026-03-15T20:08:28.805969Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=59.6 total_ms=64.4
|
||||||
|
2026-03-15T20:08:28.952002Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3082 cells_after_filter=3081 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:788" travel_entries=0 agg_ms=57.7 total_ms=62.3
|
||||||
|
2026-03-15T20:08:30.558244Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=58.2 total_ms=84.6
|
||||||
|
2026-03-15T20:08:33.038408Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3248 cells_after_filter=3246 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=56.6 total_ms=61.3
|
||||||
|
2026-03-15T20:08:33.304924Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=3113 cells_after_filter=3112 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):0.1:8855327" travel_entries=0 agg_ms=59.1 total_ms=63.7
|
||||||
|
2026-03-15T20:08:34.406141Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=13490411 parallel=true cells_before_filter=1106 cells_after_filter=1106 truncated=false bounds=49.0536,-4.4123,53.8271,6.8427 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=48.2 total_ms=55.7
|
||||||
|
2026-03-15T20:08:36.292323Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=6 rows=9738472 parallel=true cells_before_filter=765 cells_after_filter=765 truncated=false bounds=49.7628,-3.2615,52.9110,4.1398 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=36.2 total_ms=41.7
|
||||||
|
2026-03-15T20:08:39.628619Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=8 rows=2480095 parallel=true cells_before_filter=165 cells_after_filter=164 truncated=false bounds=51.3348,-0.4935,51.7042,0.3778 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=5.9 total_ms=7.2
|
||||||
|
2026-03-15T20:08:41.653852Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=18 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" ms=0.1
|
||||||
|
2026-03-15T20:08:41.915336Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=8 rows=2176649 parallel=true cells_before_filter=139 cells_after_filter=139 truncated=false bounds=51.3348,-0.3793,51.7042,0.2636 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=5.4 total_ms=6.5
|
||||||
|
2026-03-15T20:08:44.824263Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=40 cells_after_filter=37 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=1.1 total_ms=1.4
|
||||||
|
2026-03-15T20:08:45.089912Z INFO property_map_server::routes::properties: GET /api/hexagon-properties h3=88195da457fffff resolution=8 total=18 returned=18 offset=0 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" ms=0.2
|
||||||
|
2026-03-15T20:09:25.568766Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.1 total_ms=3.8
|
||||||
|
2026-03-15T20:09:26.074762Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=40 cells_after_filter=37 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):833:8855327" travel_entries=0 agg_ms=1.2 total_ms=1.3
|
||||||
|
2026-03-15T20:09:28.666758Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=53 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:8855327" ms=0.1
|
||||||
|
2026-03-15T20:09:29.155687Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1096 cells_after_filter=963 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:8855327" travel_entries=0 agg_ms=2.6 total_ms=12.1
|
||||||
|
2026-03-15T20:09:29.978092Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.1 total_ms=3.8
|
||||||
|
2026-03-15T20:09:30.496935Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1096 cells_after_filter=963 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:8855327" travel_entries=0 agg_ms=2.1 total_ms=3.7
|
||||||
|
2026-03-15T20:09:33.901621Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" ms=0.1
|
||||||
|
2026-03-15T20:09:34.385794Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" travel_entries=0 agg_ms=2.4 total_ms=10.3
|
||||||
|
2026-03-15T20:09:35.102671Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.2 total_ms=3.9
|
||||||
|
2026-03-15T20:09:35.587723Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" travel_entries=0 agg_ms=2.2 total_ms=3.7
|
||||||
|
2026-03-15T20:09:36.148540Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):98:204" ms=0.1
|
||||||
|
2026-03-15T20:09:36.644673Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):98:204" travel_entries=0 agg_ms=2.5 total_ms=10.4
|
||||||
|
2026-03-15T20:09:37.160139Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.2 total_ms=3.9
|
||||||
|
2026-03-15T20:09:37.683238Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):98:204" travel_entries=0 agg_ms=2.2 total_ms=3.8
|
||||||
|
2026-03-15T20:09:37.944776Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):89:204" ms=0.1
|
||||||
|
2026-03-15T20:09:37.969098Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):89:204" travel_entries=0 agg_ms=2.2 total_ms=10.4
|
||||||
|
2026-03-15T20:09:42.074237Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1197 cells_after_filter=1045 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=1 filters_raw="Listing status:For sale" travel_entries=0 agg_ms=2.1 total_ms=4.2
|
||||||
|
2026-03-15T20:09:42.532178Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):89:204" travel_entries=0 agg_ms=2.4 total_ms=4.0
|
||||||
|
2026-03-15T20:09:50.016920Z INFO property_map_server::routes::hexagon_stats: GET /api/hexagon-stats h3=88195da457fffff resolution=8 total_count=19 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" ms=0.1
|
||||||
|
2026-03-15T20:09:50.508364Z INFO property_map_server::routes::hexagons: GET /api/hexagons resolution=9 rows=308665 parallel=true cells_before_filter=1081 cells_after_filter=949 truncated=false bounds=51.5155,-0.2104,51.6023,-0.0591 filters=2 filters_raw="Listing status:For sale;;Total floor area (sqm):106:204" travel_entries=0 agg_ms=2.2 total_ms=10.2
|
||||||
|
2026-03-15T20:10:13.683691Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:10:13.683848Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:10:13.683854Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:10:13.750258Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:10:13.750268Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:10:16.179096Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:10:16.179106Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:10:16.456525Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:10:16.456537Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:10:16.514061Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:10:16.514070Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:10:22.681306Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:10:22.681402Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
|
||||||
|
2026-03-15T20:10:24.012054Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T20:10:24.180353Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T20:10:24.607077Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T20:10:25.773925Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T20:10:27.857482Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T20:10:29.227608Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T20:10:31.336600Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T20:10:31.336609Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T20:10:31.843715Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T20:10:31.843723Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T20:10:32.766778Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T20:10:38.715142Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T20:10:41.203246Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T20:10:42.829684Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
|
||||||
|
2026-03-15T20:10:42.829695Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T20:10:42.925550Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T20:10:42.925560Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T20:10:43.323292Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T20:10:43.323313Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T20:10:43.323319Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T20:10:43.341356Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T20:10:43.453718Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T20:10:43.454297Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T20:10:43.490938Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T20:10:43.490947Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T20:10:43.496143Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T20:10:43.496149Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T20:10:43.496678Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T20:10:43.497419Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T20:10:43.497476Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T20:10:43.497486Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T20:10:43.497491Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T20:10:43.498313Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T20:10:50.089457Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T20:10:50.089468Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T20:10:50.089483Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T20:10:50.089693Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T20:10:50.134239Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T20:10:50.160720Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T20:10:50.160908Z INFO property_map_server: Precomputed features response groups=9
|
||||||
|
2026-03-15T20:10:50.160921Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T20:10:50.220618Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T20:10:50.224719Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T20:10:50.247220Z INFO property_map_server::pocketbase: Added notes text field to PocketBase collection 'saved_searches'
|
||||||
|
2026-03-15T20:10:50.251061Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T20:10:50.257749Z INFO property_map_server::pocketbase: Added notes text field to PocketBase collection 'saved_properties'
|
||||||
|
2026-03-15T20:10:50.304219Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
|
||||||
|
2026-03-15T20:10:50.308723Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T20:10:50.308749Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T20:10:50.308761Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T20:10:50.314963Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T20:10:50.315108Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T20:10:50.315266Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T20:10:50.316776Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T20:10:50.316796Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T20:10:50.316843Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T20:10:51.706624Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T20:10:51.706663Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
2026-03-15T20:10:52.074357Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:10:52.074443Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:10:52.507461Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:10:52.508607Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:10:52.516615Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:10:52.522899Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:10:52.536710Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:10:52.541257Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:10:52.549244Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:10:52.550031Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:18:43.657651Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:18:43.657816Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:18:43.657822Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:18:43.746197Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:18:43.746208Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:18:46.373581Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:18:46.373592Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:18:46.679114Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:18:46.679124Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:18:46.747208Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:18:46.747221Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:19:00.212275Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:19:00.212447Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:19:00.212456Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:19:00.289849Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:19:00.289859Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:19:02.760385Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:19:02.760396Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:19:03.052345Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:19:03.052355Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:19:03.116051Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:19:03.116060Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:19:09.885025Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:19:09.885115Z INFO property_map_server::data::property: Feature columns from config numeric=54 enums=13 total=67
|
||||||
|
2026-03-15T20:19:11.176403Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T20:19:11.363917Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T20:19:11.771906Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T20:19:13.025850Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T20:19:15.244059Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T20:19:16.598869Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T20:19:18.813936Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T20:19:18.813945Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T20:19:19.353132Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T20:19:19.353141Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T20:19:20.231462Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T20:19:26.257466Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T20:19:28.766985Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T20:19:30.388072Z INFO property_map_server: Property data loaded rows=15702691 features=67 enums=13
|
||||||
|
2026-03-15T20:19:30.388081Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T20:19:30.483869Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T20:19:30.483878Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T20:19:30.866157Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T20:19:30.866199Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T20:19:30.866214Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T20:19:30.902414Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T20:19:31.015549Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T20:19:31.016109Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T20:19:31.054466Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T20:19:31.054474Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T20:19:31.059717Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T20:19:31.059723Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T20:19:31.069876Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T20:19:31.070582Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T20:19:31.070632Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T20:19:31.070640Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T20:19:31.070644Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T20:19:31.079677Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T20:19:39.095251Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T20:19:39.095261Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T20:19:39.095277Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T20:19:39.185235Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T20:19:39.228620Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T20:19:39.315674Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T20:19:39.315892Z INFO property_map_server: Precomputed features response groups=9
|
||||||
|
2026-03-15T20:19:39.315908Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T20:19:39.370690Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T20:19:39.373624Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T20:19:39.378395Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T20:19:39.427367Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
|
||||||
|
2026-03-15T20:19:39.430988Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T20:19:39.431004Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T20:19:39.431017Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T20:19:39.437636Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T20:19:39.437807Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T20:19:39.437966Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T20:19:39.439692Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T20:19:39.439715Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T20:19:39.439777Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T20:19:42.954025Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T20:19:42.954067Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
2026-03-15T20:19:43.261880Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:19:43.263169Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:43.749947Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:43.751657Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:44.102549Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:44.111787Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:44.529068Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:44.529991Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:44.546765Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:44.551558Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:44.558857Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:44.565720Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:44.574121Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:44.577816Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:50.493074Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:50.493085Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:50.514758Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:50.515163Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:50.526484Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:50.530275Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:50.535884Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:50.542453Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:50.551188Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:20:50.556666Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:51.102224Z INFO property_map_server::routes::features: GET /api/features
|
||||||
|
2026-03-15T20:20:51.110261Z INFO property_map_server::routes::pois: GET /api/poi-categories count=74 groups=11
|
||||||
|
2026-03-15T20:21:45.349498Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:21:45.349655Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:21:45.349664Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:21:45.421388Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:21:45.421400Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:21:47.937781Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:21:47.937791Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:21:48.227331Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:21:48.227342Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:21:48.286224Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:21:48.286233Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:22:05.552980Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:31:37.445987Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:31:37.446148Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:31:37.446155Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:31:37.534049Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:31:37.534061Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:31:40.177015Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:31:40.177028Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:31:40.482618Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:31:40.482630Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:31:40.546018Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:31:40.546027Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:31:53.986877Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:31:53.987039Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:31:53.987045Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:31:54.063239Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:31:54.063248Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:31:56.648053Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:31:56.648065Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:31:56.965183Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:31:56.965194Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:31:57.027327Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:31:57.027342Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:31:59.916992Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:32:04.424692Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:32:04.424881Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:32:04.424890Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:32:04.493173Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:32:04.493184Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:32:07.324815Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:32:07.324827Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:32:07.628171Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:32:07.628182Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:32:07.697976Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:32:07.697987Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:32:09.987127Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:32:15.239857Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:32:15.240016Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:32:15.240027Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:32:15.312610Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:32:15.312619Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:32:17.889502Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:32:17.889512Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:32:18.181929Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:32:18.181939Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:32:18.245757Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:32:18.245767Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:32:23.084864Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:32:23.085017Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:32:23.085025Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:32:23.149174Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:32:23.149184Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:32:25.785485Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:32:25.785496Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:32:26.076631Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:32:26.076644Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:32:26.135954Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:32:26.135967Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:32:28.972888Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:42:17.088723Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:42:17.088899Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:42:17.088907Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:42:17.150999Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:42:17.151009Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:42:19.827707Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:42:19.827719Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:42:20.135500Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:42:20.135509Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:42:20.197192Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:42:20.197202Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:42:56.103982Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:42:56.104138Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:42:56.104143Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:42:56.204428Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:42:56.204439Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:42:58.832976Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:42:58.832987Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:42:59.132876Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:42:59.132886Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:42:59.192137Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:42:59.192148Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:43:26.892462Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T20:56:11.923543Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:56:11.923747Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:56:11.923760Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:56:12.004141Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:56:12.004153Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:56:18.202087Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:56:18.202098Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:56:18.603337Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:56:18.603351Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:56:18.676290Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:56:18.676299Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:56:51.555700Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T20:56:51.555882Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T20:56:51.555890Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T20:56:51.649380Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T20:56:51.649390Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T20:56:54.479400Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T20:56:54.479413Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T20:56:54.787050Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T20:56:54.787063Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T20:56:54.853848Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T20:56:54.853857Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T20:57:26.731834Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T21:00:33.407080Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T21:00:33.407232Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T21:00:33.407238Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T21:00:33.499072Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T21:00:33.499081Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T21:00:36.274914Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T21:00:36.274924Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T21:00:36.575562Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T21:00:36.575572Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T21:00:36.638808Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T21:00:36.638817Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T21:00:50.949722Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T21:03:11.209421Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T21:03:11.209606Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T21:03:11.209612Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T21:03:11.289498Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T21:03:11.289508Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T21:03:13.875043Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T21:03:13.875053Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T21:03:14.208971Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T21:03:14.208982Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T21:03:14.272084Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T21:03:14.272095Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T21:03:28.932912Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T21:03:28.933073Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T21:03:28.933082Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T21:03:29.004139Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T21:03:29.004150Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T21:03:31.538874Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T21:03:31.538884Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T21:03:31.819659Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T21:03:31.819669Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T21:03:31.879197Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T21:03:31.879206Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T21:03:46.003768Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T21:19:16.107508Z INFO property_map_server: Prometheus metrics initialized
|
||||||
|
2026-03-15T21:19:16.107694Z INFO property_map_server: Loading property data from /app/data/properties.parquet, /app/data/postcode.parquet, /app/data-scraped/online_listings_buy.parquet, /app/data-scraped/online_listings_rent.parquet
|
||||||
|
2026-03-15T21:19:16.107702Z INFO property_map_server::data::property: Loading postcode features from "/app/data/postcode.parquet"
|
||||||
|
2026-03-15T21:19:16.272588Z INFO property_map_server::data::property: Postcode features loaded rows=1262367
|
||||||
|
2026-03-15T21:19:16.272599Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
|
||||||
|
2026-03-15T21:19:22.944816Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203381
|
||||||
|
2026-03-15T21:19:22.944828Z INFO property_map_server::data::property: Loading buy listings from "/app/data-scraped/online_listings_buy.parquet"
|
||||||
|
2026-03-15T21:19:23.587534Z INFO property_map_server::data::property: buy listings joined rows=474965
|
||||||
|
2026-03-15T21:19:23.589329Z INFO property_map_server::data::property: Loading rent listings from "/app/data-scraped/online_listings_rent.parquet"
|
||||||
|
2026-03-15T21:19:23.673638Z INFO property_map_server::data::property: rent listings joined rows=24345
|
||||||
|
2026-03-15T21:19:23.673648Z INFO property_map_server::data::property: Concatenating all data sources
|
||||||
|
2026-03-15T21:20:10.134367Z INFO property_map_server::data::property: All data sources combined properties=15203381 buy_listings=474965 rent_listings=24345 total=15702691
|
||||||
|
2026-03-15T21:20:10.173078Z INFO property_map_server::data::property: Feature columns from config numeric=55 enums=13 total=68
|
||||||
|
2026-03-15T21:20:11.640174Z INFO property_map_server::data::property: Combined data selected rows=15702691
|
||||||
|
2026-03-15T21:20:11.825306Z INFO property_map_server::data::property: Extracting numeric feature columns
|
||||||
|
2026-03-15T21:20:12.283833Z INFO property_map_server::data::property: Computing histograms for numeric features
|
||||||
|
2026-03-15T21:20:13.735551Z INFO property_map_server::data::property: Extracting string columns
|
||||||
|
2026-03-15T21:20:16.091555Z INFO property_map_server::data::property: Building enum features
|
||||||
|
2026-03-15T21:20:17.505895Z INFO property_map_server::data::property: Extracting renovation history
|
||||||
|
2026-03-15T21:20:19.730770Z INFO property_map_server::data::property: Renovation history extracted properties_with_events=1829807
|
||||||
|
2026-03-15T21:20:19.730780Z INFO property_map_server::data::property: Extracting listing features
|
||||||
|
2026-03-15T21:20:20.299294Z INFO property_map_server::data::property: Listing features extracted properties_with_features=412749
|
||||||
|
2026-03-15T21:20:20.299302Z INFO property_map_server::data::property: Sorting rows by spatial locality
|
||||||
|
2026-03-15T21:20:21.284951Z INFO property_map_server::data::property: Building interned strings
|
||||||
|
2026-03-15T21:20:27.822185Z INFO property_map_server::data::property: Transposing to row-major layout (spatially sorted, quantized to u16)
|
||||||
|
2026-03-15T21:20:30.681373Z INFO property_map_server::data::property: Data loading complete
|
||||||
|
2026-03-15T21:20:32.249224Z INFO property_map_server: Property data loaded rows=15702691 features=68 enums=13
|
||||||
|
2026-03-15T21:20:32.249237Z INFO property_map_server: Building spatial grid index (0.01° cells)
|
||||||
|
2026-03-15T21:20:32.646329Z INFO property_map_server: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T21:20:32.646339Z INFO property_map_server::data::property: Precomputing H3 cells at resolution 12
|
||||||
|
2026-03-15T21:20:33.084366Z INFO property_map_server::data::property: H3 precomputation complete (15702691 cells)
|
||||||
|
2026-03-15T21:20:33.084392Z INFO property_map_server: Loading POI data from /app/data/filtered_uk_pois.parquet
|
||||||
|
2026-03-15T21:20:33.084431Z INFO property_map_server::data::poi: Loading POI data from "/app/data/filtered_uk_pois.parquet"...
|
||||||
|
2026-03-15T21:20:33.115170Z INFO property_map_server::data::poi: Loaded 678242 POIs
|
||||||
|
2026-03-15T21:20:33.231940Z INFO property_map_server::data::poi: POI string columns interned category_unique=74 group_unique=11 emoji_unique=71
|
||||||
|
2026-03-15T21:20:33.232512Z INFO property_map_server::data::poi: POI data loading complete.
|
||||||
|
2026-03-15T21:20:33.270242Z INFO property_map_server: POI data loaded pois=678242
|
||||||
|
2026-03-15T21:20:33.270252Z INFO property_map_server: Building POI spatial grid index
|
||||||
|
2026-03-15T21:20:33.275905Z INFO property_map_server: Loading place data from /app/data/places.parquet
|
||||||
|
2026-03-15T21:20:33.275913Z INFO property_map_server::data::places: Loading place data from "/app/data/places.parquet"...
|
||||||
|
2026-03-15T21:20:33.280404Z INFO property_map_server::data::places: Loaded 3474 places
|
||||||
|
2026-03-15T21:20:33.281434Z INFO property_map_server::data::places: Place data loaded places=3474 types=2 with_population=71 with_city=3392
|
||||||
|
2026-03-15T21:20:33.281491Z INFO property_map_server: Place data loaded places=3474
|
||||||
|
2026-03-15T21:20:33.281502Z INFO property_map_server: Loading postcode boundaries from /app/data/postcode_boundaries
|
||||||
|
2026-03-15T21:20:33.281509Z INFO property_map_server::data::postcodes: Loading postcode boundaries from "/app/data/postcode_boundaries"
|
||||||
|
2026-03-15T21:20:33.282358Z INFO property_map_server::data::postcodes: Found GeoJSON files to process files=2361
|
||||||
|
2026-03-15T21:20:41.788648Z INFO property_map_server::data::postcodes: Postcode boundary data ready postcodes=1490140
|
||||||
|
2026-03-15T21:20:42.014614Z INFO property_map_server: Postcode boundaries loaded postcodes=1490140
|
||||||
|
2026-03-15T21:20:42.014635Z INFO property_map_server: Loading PMTiles from /app/data/uk.pmtiles
|
||||||
|
2026-03-15T21:20:42.107058Z INFO property_map_server: PMTiles loaded successfully
|
||||||
|
2026-03-15T21:20:42.150975Z INFO property_map_server: No --dist provided; static serving and OG injection disabled
|
||||||
|
2026-03-15T21:20:42.241257Z INFO property_map_server: Screenshot service configured: http://screenshot:8002
|
||||||
|
2026-03-15T21:20:42.241801Z INFO property_map_server: Precomputed features response groups=8
|
||||||
|
2026-03-15T21:20:42.241820Z INFO property_map_server: PocketBase configured: http://pocketbase:8090
|
||||||
|
2026-03-15T21:20:42.361970Z INFO property_map_server::pocketbase: PocketBase users collection already has all required fields
|
||||||
|
2026-03-15T21:20:42.370041Z INFO property_map_server::pocketbase: PocketBase collection 'saved_searches' API rules updated
|
||||||
|
2026-03-15T21:20:42.375199Z INFO property_map_server::pocketbase: PocketBase collection 'saved_properties' API rules updated
|
||||||
|
2026-03-15T21:20:42.642209Z INFO property_map_server::pocketbase: PocketBase meta.appURL set to https://perfect-postcodes.co.uk/pb
|
||||||
|
2026-03-15T21:20:42.651503Z INFO property_map_server::pocketbase: PocketBase OAuth configured on users collection
|
||||||
|
2026-03-15T21:20:42.651536Z INFO property_map_server: Gemini configured (model: gemini-3-flash-preview)
|
||||||
|
2026-03-15T21:20:42.651550Z INFO property_map_server: Loading travel time data from /app/data/travel-times
|
||||||
|
2026-03-15T21:20:42.658771Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="bicycle" destinations=2780
|
||||||
|
2026-03-15T21:20:42.658942Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="walking" destinations=350
|
||||||
|
2026-03-15T21:20:42.659104Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="car" destinations=355
|
||||||
|
2026-03-15T21:20:42.661000Z INFO property_map_server::data::travel_time: Travel time mode discovered mode="transit" destinations=1849
|
||||||
|
2026-03-15T21:20:42.661031Z INFO property_map_server: Travel time store loaded modes=4
|
||||||
|
2026-03-15T21:20:42.661088Z INFO property_map_server: Precomputed AI filters system prompt
|
||||||
|
2026-03-15T21:20:43.239746Z WARN property_map_server::pocketbase: PocketBase invites count query failed: 400 Bad Request
|
||||||
|
2026-03-15T21:20:47.481371Z INFO property_map_server: All memory pages locked (mlockall)
|
||||||
|
2026-03-15T21:20:47.481419Z INFO property_map_server: Server listening on 0.0.0.0:8001
|
||||||
|
|
@ -168,7 +168,12 @@ impl PostcodeData {
|
||||||
local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east));
|
local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok::<_, anyhow::Error>((local_postcodes, local_polygons, local_centroids, local_aabbs))
|
Ok::<_, anyhow::Error>((
|
||||||
|
local_postcodes,
|
||||||
|
local_polygons,
|
||||||
|
local_centroids,
|
||||||
|
local_aabbs,
|
||||||
|
))
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -543,10 +543,9 @@ impl PropertyData {
|
||||||
listings_buy
|
listings_buy
|
||||||
.lazy()
|
.lazy()
|
||||||
.with_column(
|
.with_column(
|
||||||
(col("Asking price").cast(DataType::Float64)
|
(col("Asking price").cast(DataType::Float64) / col("Total floor area (sqm)"))
|
||||||
/ col("Total floor area (sqm)"))
|
.round(0)
|
||||||
.round(0)
|
.alias("Asking price per sqm"),
|
||||||
.alias("Asking price per sqm"),
|
|
||||||
)
|
)
|
||||||
.collect()
|
.collect()
|
||||||
.context("Failed to derive Asking price per sqm")?
|
.context("Failed to derive Asking price per sqm")?
|
||||||
|
|
|
||||||
|
|
@ -820,8 +820,7 @@ async fn poll_pocketbase_counts(state: &AppState) {
|
||||||
("type", "redeemed"),
|
("type", "redeemed"),
|
||||||
),
|
),
|
||||||
] {
|
] {
|
||||||
if let Some(total) = pb_count(&state.http_client, pb_url, &token, "invites", filter).await
|
if let Some(total) = pb_count(&state.http_client, pb_url, &token, "invites", filter).await {
|
||||||
{
|
|
||||||
gauge!(metric, labels.0 => labels.1.to_string()).set(total as f64);
|
gauge!(metric, labels.0 => labels.1.to_string()).set(total as f64);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,9 @@ use std::sync::Arc;
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::response::Json;
|
use axum::response::Json;
|
||||||
use axum::Extension;
|
use axum::Extension;
|
||||||
|
use metrics::counter;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
use metrics::counter;
|
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::auth::OptionalUser;
|
use crate::auth::OptionalUser;
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,11 @@ use axum::extract::Query;
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::response::{IntoResponse, Json};
|
use axum::response::{IntoResponse, Json};
|
||||||
use axum::Extension;
|
use axum::Extension;
|
||||||
|
use metrics::histogram;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
use metrics::histogram;
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use crate::aggregation::Aggregator;
|
use crate::aggregation::Aggregator;
|
||||||
|
|
@ -230,8 +230,13 @@ pub async fn get_hexagons(
|
||||||
) {
|
) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let cell_id =
|
let cell_id = cell_for_row_cached(
|
||||||
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
|
row,
|
||||||
|
precomputed,
|
||||||
|
h3_res,
|
||||||
|
need_parent,
|
||||||
|
&mut h3_cache,
|
||||||
|
);
|
||||||
let agg = local_groups
|
let agg = local_groups
|
||||||
.entry(cell_id)
|
.entry(cell_id)
|
||||||
.or_insert_with(|| Aggregator::new(num_features));
|
.or_insert_with(|| Aggregator::new(num_features));
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,10 @@ use axum::extract::{Path, Query};
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::response::{IntoResponse, Json};
|
use axum::response::{IntoResponse, Json};
|
||||||
use axum::Extension;
|
use axum::Extension;
|
||||||
|
use metrics::histogram;
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
use metrics::histogram;
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use crate::aggregation::Aggregator;
|
use crate::aggregation::Aggregator;
|
||||||
|
|
|
||||||
|
|
@ -67,9 +67,7 @@ enum FeatureAccum {
|
||||||
global_max: f32,
|
global_max: f32,
|
||||||
},
|
},
|
||||||
/// Enum: count occurrences per variant index.
|
/// Enum: count occurrences per variant index.
|
||||||
Enum {
|
Enum { value_counts: Vec<u64> },
|
||||||
value_counts: Vec<u64>,
|
|
||||||
},
|
|
||||||
/// Feature skipped (not in field_set).
|
/// Feature skipped (not in field_set).
|
||||||
Skip,
|
Skip,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue