try
Some checks failed
CI / Check (push) Failing after 3m22s
Build and publish Docker image / build-and-push (push) Successful in 7m25s

This commit is contained in:
Andras Schmelczer 2026-06-04 22:34:26 +01:00
parent 843d14b7ba
commit c938b71904
13 changed files with 698 additions and 109 deletions

View file

@ -434,7 +434,8 @@
" color=\"white\" if v > 55 else \"black\")\n",
"fig.colorbar(im, ax=ax, label=\"% missing\", fraction=0.025, pad=0.01)\n",
"ax.set_title(\"Missing (null or empty-string) % by provider\")\n",
"plt.tight_layout(); plt.show()"
"plt.tight_layout()\n",
"plt.show()"
]
},
{
@ -640,9 +641,11 @@
" ax.axvline(np.log10(v), color=\"red\", ls=\"--\", lw=1)\n",
" ax.text(np.log10(v), ax.get_ylim()[1] * 0.92, lab, rotation=90, fontsize=7,\n",
" color=\"red\", va=\"top\")\n",
"ax.set_xlabel(\"log10(Asking price)\"); ax.set_ylabel(\"count\")\n",
"ax.set_xlabel(\"log10(Asking price)\")\n",
"ax.set_ylabel(\"count\")\n",
"ax.set_title(\"Asking price distribution (log scale)\")\n",
"plt.tight_layout(); plt.show()"
"plt.tight_layout()\n",
"plt.show()"
]
},
{
@ -764,9 +767,12 @@
"ax.scatter(x[impossible], y[impossible], s=14, alpha=0.8, color=\"red\", label=\"impossible (area < beds×8)\")\n",
"xs = np.linspace(0, x.max(), 60)\n",
"ax.plot(xs, np.maximum(10, xs * 8), \"k--\", lw=1, label=\"area = beds × 8 m²\")\n",
"ax.set_xlabel(\"Bedrooms\"); ax.set_ylabel(\"Total floor area (m²)\")\n",
"ax.set_title(\"Floor area vs bedrooms (≤160 m²)\"); ax.legend(fontsize=8)\n",
"plt.tight_layout(); plt.show()"
"ax.set_xlabel(\"Bedrooms\")\n",
"ax.set_ylabel(\"Total floor area (m²)\")\n",
"ax.set_title(\"Floor area vs bedrooms (≤160 m²)\")\n",
"ax.legend(fontsize=8)\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
@ -947,9 +953,13 @@
"for prov, c in colors.items():\n",
" gp = g.filter(pl.col(\"provider\") == prov)\n",
" ax.scatter(gp[\"lon\"], gp[\"lat\"], s=2, alpha=0.30, color=c, label=prov)\n",
"ax.set_xlabel(\"lon\"); ax.set_ylabel(\"lat\"); ax.set_aspect(\"equal\", adjustable=\"datalim\")\n",
"ax.set_title(\"Listing coordinates by provider (25k sample)\"); ax.legend(markerscale=4, fontsize=8)\n",
"plt.tight_layout(); plt.show()"
"ax.set_xlabel(\"lon\")\n",
"ax.set_ylabel(\"lat\")\n",
"ax.set_aspect(\"equal\", adjustable=\"datalim\")\n",
"ax.set_title(\"Listing coordinates by provider (25k sample)\")\n",
"ax.legend(markerscale=4, fontsize=8)\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
@ -2040,16 +2050,21 @@
" clean.filter((pl.col(\"Bedrooms\") == 0) & (pl.col(\"Property sub-type\") != \"Studio\")).height,\n",
" 0,\n",
"]\n",
"x = np.arange(len(labels)); w = 0.38\n",
"x = np.arange(len(labels))\n",
"w = 0.38\n",
"fig, ax = plt.subplots(figsize=(10, 3.6))\n",
"b1 = ax.bar(x - w / 2, before, w, label=\"raw\", color=\"#dc2626\")\n",
"b2 = ax.bar(x + w / 2, after, w, label=\"clean\", color=\"#16a34a\")\n",
"ax.set_xticks(x); ax.set_xticklabels(labels, fontsize=8)\n",
"ax.set_yscale(\"symlog\"); ax.set_ylabel(\"value (symlog)\")\n",
"ax.set_title(\"Before / after cleanup\"); ax.legend()\n",
"ax.set_xticks(x)\n",
"ax.set_xticklabels(labels, fontsize=8)\n",
"ax.set_yscale(\"symlog\")\n",
"ax.set_ylabel(\"value (symlog)\")\n",
"ax.set_title(\"Before / after cleanup\")\n",
"ax.legend()\n",
"for bars in (b1, b2):\n",
" ax.bar_label(bars, fmt=\"%.0f\", fontsize=7, padding=2)\n",
"plt.tight_layout(); plt.show()"
"plt.tight_layout()\n",
"plt.show()"
]
},
{