diff --git a/analysis.ipynb b/analysis.ipynb index 3b4a58a..b209e53 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -99,7 +99,7 @@ "row_count = lf.select(pl.len()).collect().item()\n", "london_count = lf.filter(LONDON_FILTER).select(pl.len()).collect().item()\n", "print(f\"Total transactions: {row_count:,}\")\n", - "print(f\"London transactions: {london_count:,} ({london_count/row_count*100:.1f}%)\")" + "print(f\"London transactions: {london_count:,} ({london_count / row_count * 100:.1f}%)\")" ] }, { @@ -208,6 +208,7 @@ " pl.col(\"price\").quantile(0.95).alias(\"p95\"),\n", " ).collect()\n", "\n", + "\n", "print(\"=== National Price Statistics ===\")\n", "display(get_price_stats(lf))\n", "print(\"\\n=== London Price Statistics ===\")\n", @@ -1115,16 +1116,36 @@ " lazy_frame.group_by(\"property_type\")\n", " .agg(pl.len().alias(\"count\"))\n", " .sort(\"count\", descending=True)\n", - " .with_columns(pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\"))\n", + " .with_columns(\n", + " pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\")\n", + " )\n", " .collect()\n", " )\n", "\n", + "\n", "property_types_national = get_property_types(lf)\n", "property_types_london = get_property_types(lf.filter(LONDON_FILTER))\n", "\n", "fig = make_subplots(rows=1, cols=2, subplot_titles=(\"National\", \"London\"))\n", - "fig.add_trace(go.Bar(x=property_types_national[\"type_name\"], y=property_types_national[\"count\"], name=\"National\"), row=1, col=1)\n", - "fig.add_trace(go.Bar(x=property_types_london[\"type_name\"], y=property_types_london[\"count\"], name=\"London\", marker_color=\"crimson\"), row=1, col=2)\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=property_types_national[\"type_name\"],\n", + " y=property_types_national[\"count\"],\n", + " name=\"National\",\n", + " ),\n", + " row=1,\n", + " col=1,\n", + ")\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=property_types_london[\"type_name\"],\n", + " y=property_types_london[\"count\"],\n", + " name=\"London\",\n", + " marker_color=\"crimson\",\n", + " ),\n", + " row=1,\n", + " col=2,\n", + ")\n", "fig.update_layout(title_text=\"Property Type Distribution\", showlegend=False, height=400)\n", "fig.show()" ] @@ -2013,12 +2034,34 @@ " .collect()\n", " )\n", "\n", + "\n", "tenure_national = get_tenure(lf)\n", "tenure_london = get_tenure(lf.filter(LONDON_FILTER))\n", "\n", - "fig = make_subplots(rows=1, cols=2, specs=[[{\"type\": \"pie\"}, {\"type\": \"pie\"}]], subplot_titles=(\"National\", \"London\"))\n", - "fig.add_trace(go.Pie(labels=tenure_national[\"tenure_name\"], values=tenure_national[\"count\"], name=\"National\"), row=1, col=1)\n", - "fig.add_trace(go.Pie(labels=tenure_london[\"tenure_name\"], values=tenure_london[\"count\"], name=\"London\"), row=1, col=2)\n", + "fig = make_subplots(\n", + " rows=1,\n", + " cols=2,\n", + " specs=[[{\"type\": \"pie\"}, {\"type\": \"pie\"}]],\n", + " subplot_titles=(\"National\", \"London\"),\n", + ")\n", + "fig.add_trace(\n", + " go.Pie(\n", + " labels=tenure_national[\"tenure_name\"],\n", + " values=tenure_national[\"count\"],\n", + " name=\"National\",\n", + " ),\n", + " row=1,\n", + " col=1,\n", + ")\n", + "fig.add_trace(\n", + " go.Pie(\n", + " labels=tenure_london[\"tenure_name\"],\n", + " values=tenure_london[\"count\"],\n", + " name=\"London\",\n", + " ),\n", + " row=1,\n", + " col=2,\n", + ")\n", "fig.update_layout(title_text=\"Freehold vs Leasehold\", height=400)\n", "fig.show()" ] @@ -2958,10 +3001,16 @@ " .collect()\n", ")\n", "\n", - "fig = px.bar(top_counties.to_pandas(), x=\"count\", y=\"county\", orientation=\"h\",\n", + "fig = px.bar(\n", + " top_counties.to_pandas(),\n", + " x=\"count\",\n", + " y=\"county\",\n", + " orientation=\"h\",\n", " title=\"Top 20 Counties by Transaction Volume\",\n", - " color=\"avg_price\", color_continuous_scale=\"Blues\",\n", - " labels={\"count\": \"Transactions\", \"county\": \"County\", \"avg_price\": \"Avg Price\"})\n", + " color=\"avg_price\",\n", + " color_continuous_scale=\"Blues\",\n", + " labels={\"count\": \"Transactions\", \"county\": \"County\", \"avg_price\": \"Avg Price\"},\n", + ")\n", "fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=600)\n", "fig.show()" ] @@ -4591,15 +4640,29 @@ "london_boroughs = (\n", " lf.filter(LONDON_FILTER)\n", " .group_by(\"district\")\n", - " .agg(pl.len().alias(\"count\"), pl.col(\"price\").mean().alias(\"avg_price\"), pl.col(\"price\").median().alias(\"median_price\"))\n", + " .agg(\n", + " pl.len().alias(\"count\"),\n", + " pl.col(\"price\").mean().alias(\"avg_price\"),\n", + " pl.col(\"price\").median().alias(\"median_price\"),\n", + " )\n", " .sort(\"avg_price\", descending=True)\n", " .collect()\n", ")\n", "\n", - "fig = px.bar(london_boroughs.to_pandas(), x=\"avg_price\", y=\"district\", orientation=\"h\",\n", + "fig = px.bar(\n", + " london_boroughs.to_pandas(),\n", + " x=\"avg_price\",\n", + " y=\"district\",\n", + " orientation=\"h\",\n", " title=\"London Boroughs by Average Price\",\n", - " color=\"count\", color_continuous_scale=\"Reds\",\n", - " labels={\"avg_price\": \"Average Price (£)\", \"district\": \"Borough\", \"count\": \"Transactions\"})\n", + " color=\"count\",\n", + " color_continuous_scale=\"Reds\",\n", + " labels={\n", + " \"avg_price\": \"Average Price (£)\",\n", + " \"district\": \"Borough\",\n", + " \"count\": \"Transactions\",\n", + " },\n", + ")\n", "fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n", "fig.show()" ] @@ -4622,11 +4685,16 @@ " return (\n", " lazy_frame.with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n", " .group_by(\"year\")\n", - " .agg(pl.len().alias(\"count\"), pl.col(\"price\").mean().alias(\"avg_price\"), pl.col(\"price\").median().alias(\"median_price\"))\n", + " .agg(\n", + " pl.len().alias(\"count\"),\n", + " pl.col(\"price\").mean().alias(\"avg_price\"),\n", + " pl.col(\"price\").median().alias(\"median_price\"),\n", + " )\n", " .sort(\"year\")\n", " .collect()\n", " )\n", "\n", + "\n", "yearly_national = get_yearly_stats(lf)\n", "yearly_london = get_yearly_stats(lf.filter(LONDON_FILTER))" ] @@ -5473,9 +5541,29 @@ ], "source": [ "fig = go.Figure()\n", - "fig.add_trace(go.Scatter(x=yearly_national[\"year\"], y=yearly_national[\"avg_price\"], name=\"National\", mode=\"lines+markers\"))\n", - "fig.add_trace(go.Scatter(x=yearly_london[\"year\"], y=yearly_london[\"avg_price\"], name=\"London\", mode=\"lines+markers\", line=dict(color=\"crimson\")))\n", - "fig.update_layout(title=\"Average Price by Year\", xaxis_title=\"Year\", yaxis_title=\"Average Price (£)\", height=500)\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=yearly_national[\"year\"],\n", + " y=yearly_national[\"avg_price\"],\n", + " name=\"National\",\n", + " mode=\"lines+markers\",\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=yearly_london[\"year\"],\n", + " y=yearly_london[\"avg_price\"],\n", + " name=\"London\",\n", + " mode=\"lines+markers\",\n", + " line=dict(color=\"crimson\"),\n", + " )\n", + ")\n", + "fig.update_layout(\n", + " title=\"Average Price by Year\",\n", + " xaxis_title=\"Year\",\n", + " yaxis_title=\"Average Price (£)\",\n", + " height=500,\n", + ")\n", "fig.show()" ] }, @@ -6333,14 +6421,38 @@ } ], "source": [ - "yearly_national_pct = yearly_national.with_columns((pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\"))\n", - "yearly_london_pct = yearly_london.with_columns((pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\"))\n", + "yearly_national_pct = yearly_national.with_columns(\n", + " (pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\")\n", + ")\n", + "yearly_london_pct = yearly_london.with_columns(\n", + " (pl.col(\"avg_price\").pct_change() * 100).alias(\"yoy_change\")\n", + ")\n", "\n", "fig = go.Figure()\n", - "fig.add_trace(go.Bar(x=yearly_national_pct[\"year\"], y=yearly_national_pct[\"yoy_change\"], name=\"National\", opacity=0.7))\n", - "fig.add_trace(go.Bar(x=yearly_london_pct[\"year\"], y=yearly_london_pct[\"yoy_change\"], name=\"London\", opacity=0.7))\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=yearly_national_pct[\"year\"],\n", + " y=yearly_national_pct[\"yoy_change\"],\n", + " name=\"National\",\n", + " opacity=0.7,\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=yearly_london_pct[\"year\"],\n", + " y=yearly_london_pct[\"yoy_change\"],\n", + " name=\"London\",\n", + " opacity=0.7,\n", + " )\n", + ")\n", "fig.add_hline(y=0, line_dash=\"dash\", line_color=\"gray\")\n", - "fig.update_layout(title=\"Year-over-Year Price Change (%)\", xaxis_title=\"Year\", yaxis_title=\"Change (%)\", barmode=\"group\", height=500)\n", + "fig.update_layout(\n", + " title=\"Year-over-Year Price Change (%)\",\n", + " xaxis_title=\"Year\",\n", + " yaxis_title=\"Change (%)\",\n", + " barmode=\"group\",\n", + " height=500,\n", + ")\n", "fig.show()" ] }, @@ -7448,18 +7560,43 @@ " lazy_frame.filter(pl.col(\"date_of_transfer\").dt.year() >= start_year)\n", " .with_columns(pl.col(\"date_of_transfer\").dt.truncate(\"1mo\").alias(\"month\"))\n", " .group_by(\"month\")\n", - " .agg(pl.len().alias(\"count\"), pl.col(\"price\").mean().alias(\"avg_price\"), pl.col(\"price\").median().alias(\"median_price\"))\n", + " .agg(\n", + " pl.len().alias(\"count\"),\n", + " pl.col(\"price\").mean().alias(\"avg_price\"),\n", + " pl.col(\"price\").median().alias(\"median_price\"),\n", + " )\n", " .sort(\"month\")\n", " .collect()\n", " )\n", "\n", + "\n", "monthly_national = get_monthly_stats(lf)\n", "monthly_london = get_monthly_stats(lf.filter(LONDON_FILTER))\n", "\n", "fig = go.Figure()\n", - "fig.add_trace(go.Scatter(x=monthly_national[\"month\"], y=monthly_national[\"avg_price\"], name=\"National\", mode=\"lines\"))\n", - "fig.add_trace(go.Scatter(x=monthly_london[\"month\"], y=monthly_london[\"avg_price\"], name=\"London\", mode=\"lines\", line=dict(color=\"crimson\")))\n", - "fig.update_layout(title=\"Monthly Average Price (2015 onwards)\", xaxis_title=\"Month\", yaxis_title=\"Average Price (£)\", height=500)\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=monthly_national[\"month\"],\n", + " y=monthly_national[\"avg_price\"],\n", + " name=\"National\",\n", + " mode=\"lines\",\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=monthly_london[\"month\"],\n", + " y=monthly_london[\"avg_price\"],\n", + " name=\"London\",\n", + " mode=\"lines\",\n", + " line=dict(color=\"crimson\"),\n", + " )\n", + ")\n", + "fig.update_layout(\n", + " title=\"Monthly Average Price (2015 onwards)\",\n", + " xaxis_title=\"Month\",\n", + " yaxis_title=\"Average Price (£)\",\n", + " height=500,\n", + ")\n", "fig.show()" ] }, @@ -8431,9 +8568,19 @@ " .collect()\n", ")\n", "\n", - "fig = px.line(yearly_by_type_london.to_pandas(), x=\"year\", y=\"avg_price\", color=\"type_name\",\n", + "fig = px.line(\n", + " yearly_by_type_london.to_pandas(),\n", + " x=\"year\",\n", + " y=\"avg_price\",\n", + " color=\"type_name\",\n", " title=\"London: Average Price by Property Type Over Time\",\n", - " labels={\"avg_price\": \"Average Price (£)\", \"year\": \"Year\", \"type_name\": \"Property Type\"}, markers=True)\n", + " labels={\n", + " \"avg_price\": \"Average Price (£)\",\n", + " \"year\": \"Year\",\n", + " \"type_name\": \"Property Type\",\n", + " },\n", + " markers=True,\n", + ")\n", "fig.update_layout(height=500)\n", "fig.show()" ] @@ -9539,12 +9686,35 @@ } ], "source": [ - "monthly_london_rolling = monthly_london.with_columns(pl.col(\"avg_price\").rolling_mean(window_size=12).alias(\"rolling_12m_avg\"))\n", + "monthly_london_rolling = monthly_london.with_columns(\n", + " pl.col(\"avg_price\").rolling_mean(window_size=12).alias(\"rolling_12m_avg\")\n", + ")\n", "\n", "fig = go.Figure()\n", - "fig.add_trace(go.Scatter(x=monthly_london_rolling[\"month\"], y=monthly_london_rolling[\"avg_price\"], name=\"Monthly\", mode=\"lines\", opacity=0.5))\n", - "fig.add_trace(go.Scatter(x=monthly_london_rolling[\"month\"], y=monthly_london_rolling[\"rolling_12m_avg\"], name=\"12-Month Rolling Avg\", mode=\"lines\", line=dict(width=3, color=\"crimson\")))\n", - "fig.update_layout(title=\"London: Monthly Price with 12-Month Rolling Average\", xaxis_title=\"Month\", yaxis_title=\"Average Price (£)\", height=500)\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=monthly_london_rolling[\"month\"],\n", + " y=monthly_london_rolling[\"avg_price\"],\n", + " name=\"Monthly\",\n", + " mode=\"lines\",\n", + " opacity=0.5,\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=monthly_london_rolling[\"month\"],\n", + " y=monthly_london_rolling[\"rolling_12m_avg\"],\n", + " name=\"12-Month Rolling Avg\",\n", + " mode=\"lines\",\n", + " line=dict(width=3, color=\"crimson\"),\n", + " )\n", + ")\n", + "fig.update_layout(\n", + " title=\"London: Monthly Price with 12-Month Rolling Average\",\n", + " xaxis_title=\"Month\",\n", + " yaxis_title=\"Average Price (£)\",\n", + " height=500,\n", + ")\n", "fig.show()" ] }, @@ -10422,15 +10592,39 @@ "yearly_indexed = yearly_national.with_columns(\n", " (pl.col(\"avg_price\") / base_price_national * 100).alias(\"national_index\")\n", ").join(\n", - " yearly_london.with_columns((pl.col(\"avg_price\") / base_price_london * 100).alias(\"london_index\")).select(\"year\", \"london_index\"),\n", - " on=\"year\"\n", + " yearly_london.with_columns(\n", + " (pl.col(\"avg_price\") / base_price_london * 100).alias(\"london_index\")\n", + " ).select(\"year\", \"london_index\"),\n", + " on=\"year\",\n", ")\n", "\n", "fig = go.Figure()\n", - "fig.add_trace(go.Scatter(x=yearly_indexed[\"year\"], y=yearly_indexed[\"national_index\"], name=\"National\", mode=\"lines+markers\"))\n", - "fig.add_trace(go.Scatter(x=yearly_indexed[\"year\"], y=yearly_indexed[\"london_index\"], name=\"London\", mode=\"lines+markers\", line=dict(color=\"crimson\")))\n", - "fig.add_hline(y=100, line_dash=\"dash\", line_color=\"gray\", annotation_text=\"1995 Baseline\")\n", - "fig.update_layout(title=\"Price Index (1995 = 100)\", xaxis_title=\"Year\", yaxis_title=\"Index\", height=500)\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=yearly_indexed[\"year\"],\n", + " y=yearly_indexed[\"national_index\"],\n", + " name=\"National\",\n", + " mode=\"lines+markers\",\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=yearly_indexed[\"year\"],\n", + " y=yearly_indexed[\"london_index\"],\n", + " name=\"London\",\n", + " mode=\"lines+markers\",\n", + " line=dict(color=\"crimson\"),\n", + " )\n", + ")\n", + "fig.add_hline(\n", + " y=100, line_dash=\"dash\", line_color=\"gray\", annotation_text=\"1995 Baseline\"\n", + ")\n", + "fig.update_layout(\n", + " title=\"Price Index (1995 = 100)\",\n", + " xaxis_title=\"Year\",\n", + " yaxis_title=\"Index\",\n", + " height=500,\n", + ")\n", "fig.show()" ] }, @@ -12076,12 +12270,24 @@ " .collect()\n", ")\n", "\n", - "flats_by_borough = borough_by_type.filter(pl.col(\"property_type\") == \"F\").sort(\"median_price\")\n", + "flats_by_borough = borough_by_type.filter(pl.col(\"property_type\") == \"F\").sort(\n", + " \"median_price\"\n", + ")\n", "\n", - "fig = px.bar(flats_by_borough.to_pandas(), x=\"median_price\", y=\"district\", orientation=\"h\",\n", + "fig = px.bar(\n", + " flats_by_borough.to_pandas(),\n", + " x=\"median_price\",\n", + " y=\"district\",\n", + " orientation=\"h\",\n", " title=\"Current Median Flat Prices by Borough (2023+)\",\n", - " color=\"count\", color_continuous_scale=\"Viridis\",\n", - " labels={\"median_price\": \"Median Price (£)\", \"district\": \"Borough\", \"count\": \"Sales Volume\"})\n", + " color=\"count\",\n", + " color_continuous_scale=\"Viridis\",\n", + " labels={\n", + " \"median_price\": \"Median Price (£)\",\n", + " \"district\": \"Borough\",\n", + " \"count\": \"Sales Volume\",\n", + " },\n", + ")\n", "fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n", "fig.show()" ] @@ -13011,14 +13217,21 @@ ], "source": [ "# Heatmap: Price by borough and property type\n", - "pivot_data = borough_by_type.pivot(on=\"type_name\", index=\"district\", values=\"median_price\").sort(\"Flat/Maisonette\", nulls_last=True)\n", + "pivot_data = borough_by_type.pivot(\n", + " on=\"type_name\", index=\"district\", values=\"median_price\"\n", + ").sort(\"Flat/Maisonette\", nulls_last=True)\n", "\n", "fig = px.imshow(\n", - " pivot_data.select([\"Flat/Maisonette\", \"Terraced\", \"Semi-detached\", \"Detached\"]).to_pandas(),\n", + " pivot_data.select(\n", + " [\"Flat/Maisonette\", \"Terraced\", \"Semi-detached\", \"Detached\"]\n", + " ).to_pandas(),\n", " y=pivot_data[\"district\"].to_list(),\n", " x=[\"Flat\", \"Terraced\", \"Semi-detached\", \"Detached\"],\n", - " color_continuous_scale=\"RdYlGn_r\", aspect=\"auto\",\n", - " title=\"Median Price by Borough and Property Type (2023+)\", labels={\"color\": \"Price (£)\"})\n", + " color_continuous_scale=\"RdYlGn_r\",\n", + " aspect=\"auto\",\n", + " title=\"Median Price by Borough and Property Type (2023+)\",\n", + " labels={\"color\": \"Price (£)\"},\n", + ")\n", "fig.update_layout(height=900)\n", "fig.show()" ] @@ -13083,6 +13296,7 @@ " return ((end_price / start_price) ** (1 / years) - 1) * 100\n", " return None\n", "\n", + "\n", "borough_yearly_all = (\n", " lf.filter(LONDON_FILTER)\n", " .with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n", @@ -13101,8 +13315,16 @@ " if len(year_data) > 0:\n", " prices[year] = year_data[\"median_price\"][0]\n", " if all(year in prices for year in years_needed):\n", - " growth_data.append({\"district\": borough, \"price_2014\": prices[2014], \"price_2019\": prices[2019], \"price_2024\": prices[2024],\n", - " \"cagr_10yr\": calculate_cagr(prices[2014], prices[2024], 10), \"cagr_5yr\": calculate_cagr(prices[2019], prices[2024], 5)})\n", + " growth_data.append(\n", + " {\n", + " \"district\": borough,\n", + " \"price_2014\": prices[2014],\n", + " \"price_2019\": prices[2019],\n", + " \"price_2024\": prices[2024],\n", + " \"cagr_10yr\": calculate_cagr(prices[2014], prices[2024], 10),\n", + " \"cagr_5yr\": calculate_cagr(prices[2019], prices[2024], 5),\n", + " }\n", + " )\n", "\n", "growth_df = pl.DataFrame(growth_data).sort(\"cagr_5yr\", descending=True)\n", "growth_df" @@ -14077,10 +14299,16 @@ ], "source": [ "# 5-year vs 10-year CAGR scatter\n", - "fig = px.scatter(growth_df.to_pandas(), x=\"cagr_10yr\", y=\"cagr_5yr\", text=\"district\",\n", + "fig = px.scatter(\n", + " growth_df.to_pandas(),\n", + " x=\"cagr_10yr\",\n", + " y=\"cagr_5yr\",\n", + " text=\"district\",\n", " title=\"Borough Price Growth: 5-Year vs 10-Year CAGR\",\n", " labels={\"cagr_10yr\": \"10-Year CAGR (%)\", \"cagr_5yr\": \"5-Year CAGR (%)\"},\n", - " color=\"price_2024\", color_continuous_scale=\"Viridis\")\n", + " color=\"price_2024\",\n", + " color_continuous_scale=\"Viridis\",\n", + ")\n", "fig.update_traces(textposition=\"top center\", textfont_size=8)\n", "fig.add_hline(y=0, line_dash=\"dash\", line_color=\"gray\")\n", "fig.add_vline(x=0, line_dash=\"dash\", line_color=\"gray\")\n", @@ -15045,11 +15273,20 @@ ], "source": [ "# 5-Year Price Growth Ranking\n", - "fig = px.bar(growth_df.sort(\"cagr_5yr\").to_pandas(), x=\"cagr_5yr\", y=\"district\", orientation=\"h\",\n", - " title=\"5-Year Price Growth by Borough (CAGR %)\", color=\"cagr_5yr\", color_continuous_scale=\"RdYlGn\",\n", - " labels={\"cagr_5yr\": \"5-Year CAGR (%)\", \"district\": \"Borough\"})\n", + "fig = px.bar(\n", + " growth_df.sort(\"cagr_5yr\").to_pandas(),\n", + " x=\"cagr_5yr\",\n", + " y=\"district\",\n", + " orientation=\"h\",\n", + " title=\"5-Year Price Growth by Borough (CAGR %)\",\n", + " color=\"cagr_5yr\",\n", + " color_continuous_scale=\"RdYlGn\",\n", + " labels={\"cagr_5yr\": \"5-Year CAGR (%)\", \"district\": \"Borough\"},\n", + ")\n", "fig.add_vline(x=0, line_dash=\"dash\", line_color=\"black\")\n", - "fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800, showlegend=False)\n", + "fig.update_layout(\n", + " yaxis={\"categoryorder\": \"total ascending\"}, height=800, showlegend=False\n", + ")\n", "fig.show()" ] }, @@ -16199,18 +16436,35 @@ "source": [ "# Property Type Mix by Borough\n", "property_mix = (\n", - " lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n", - " .group_by(\"district\", \"property_type\").agg(pl.len().alias(\"count\"))\n", - " .with_columns(pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\")).collect()\n", + " lf.filter(LONDON_FILTER)\n", + " .filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n", + " .group_by(\"district\", \"property_type\")\n", + " .agg(pl.len().alias(\"count\"))\n", + " .with_columns(pl.col(\"property_type\").replace(PROPERTY_TYPE_MAP).alias(\"type_name\"))\n", + " .collect()\n", ")\n", "totals = property_mix.group_by(\"district\").agg(pl.col(\"count\").sum().alias(\"total\"))\n", - "property_mix_pct = property_mix.join(totals, on=\"district\").with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\"))\n", - "flat_pct = property_mix_pct.filter(pl.col(\"property_type\") == \"F\").sort(\"percentage\", descending=True)\n", + "property_mix_pct = property_mix.join(totals, on=\"district\").with_columns(\n", + " (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\")\n", + ")\n", + "flat_pct = property_mix_pct.filter(pl.col(\"property_type\") == \"F\").sort(\n", + " \"percentage\", descending=True\n", + ")\n", "\n", - "fig = px.bar(property_mix_pct.to_pandas(), x=\"percentage\", y=\"district\", color=\"type_name\", orientation=\"h\",\n", + "fig = px.bar(\n", + " property_mix_pct.to_pandas(),\n", + " x=\"percentage\",\n", + " y=\"district\",\n", + " color=\"type_name\",\n", + " orientation=\"h\",\n", " title=\"Property Type Mix by Borough (2020+)\",\n", - " labels={\"percentage\": \"% of Transactions\", \"district\": \"Borough\", \"type_name\": \"Property Type\"},\n", - " category_orders={\"district\": flat_pct[\"district\"].to_list()})\n", + " labels={\n", + " \"percentage\": \"% of Transactions\",\n", + " \"district\": \"Borough\",\n", + " \"type_name\": \"Property Type\",\n", + " },\n", + " category_orders={\"district\": flat_pct[\"district\"].to_list()},\n", + ")\n", "fig.update_layout(height=900, barmode=\"stack\", legend_title=\"Property Type\")\n", "fig.show()" ] @@ -17194,19 +17448,38 @@ "source": [ "# Freehold vs Leasehold by Borough\n", "tenure_mix = (\n", - " lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n", - " .group_by(\"district\", \"duration\").agg(pl.len().alias(\"count\"))\n", - " .with_columns(pl.col(\"duration\").replace(TENURE_MAP).alias(\"tenure_name\")).collect()\n", + " lf.filter(LONDON_FILTER)\n", + " .filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n", + " .group_by(\"district\", \"duration\")\n", + " .agg(pl.len().alias(\"count\"))\n", + " .with_columns(pl.col(\"duration\").replace(TENURE_MAP).alias(\"tenure_name\"))\n", + " .collect()\n", + ")\n", + "totals_tenure = tenure_mix.group_by(\"district\").agg(\n", + " pl.col(\"count\").sum().alias(\"total\")\n", + ")\n", + "tenure_mix_pct = tenure_mix.join(totals_tenure, on=\"district\").with_columns(\n", + " (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\")\n", + ")\n", + "freehold_pct = tenure_mix_pct.filter(pl.col(\"duration\") == \"F\").sort(\n", + " \"percentage\", descending=True\n", ")\n", - "totals_tenure = tenure_mix.group_by(\"district\").agg(pl.col(\"count\").sum().alias(\"total\"))\n", - "tenure_mix_pct = tenure_mix.join(totals_tenure, on=\"district\").with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"percentage\"))\n", - "freehold_pct = tenure_mix_pct.filter(pl.col(\"duration\") == \"F\").sort(\"percentage\", descending=True)\n", "\n", - "fig = px.bar(tenure_mix_pct.to_pandas(), x=\"percentage\", y=\"district\", color=\"tenure_name\", orientation=\"h\",\n", + "fig = px.bar(\n", + " tenure_mix_pct.to_pandas(),\n", + " x=\"percentage\",\n", + " y=\"district\",\n", + " color=\"tenure_name\",\n", + " orientation=\"h\",\n", " title=\"Freehold vs Leasehold by Borough (2020+)\",\n", - " labels={\"percentage\": \"% of Transactions\", \"district\": \"Borough\", \"tenure_name\": \"Tenure\"},\n", + " labels={\n", + " \"percentage\": \"% of Transactions\",\n", + " \"district\": \"Borough\",\n", + " \"tenure_name\": \"Tenure\",\n", + " },\n", " category_orders={\"district\": freehold_pct[\"district\"].to_list()},\n", - " color_discrete_map={\"Freehold\": \"seagreen\", \"Leasehold\": \"coral\"})\n", + " color_discrete_map={\"Freehold\": \"seagreen\", \"Leasehold\": \"coral\"},\n", + ")\n", "fig.update_layout(height=900, barmode=\"stack\")\n", "fig.show()" ] @@ -18168,19 +18441,39 @@ "source": [ "# Market Activity Change (2024 vs 2019)\n", "volume_trend = (\n", - " lf.filter(LONDON_FILTER).with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n", - " .filter(pl.col(\"year\") >= 2019).group_by(\"district\", \"year\").agg(pl.len().alias(\"count\")).collect()\n", + " lf.filter(LONDON_FILTER)\n", + " .with_columns(pl.col(\"date_of_transfer\").dt.year().alias(\"year\"))\n", + " .filter(pl.col(\"year\") >= 2019)\n", + " .group_by(\"district\", \"year\")\n", + " .agg(pl.len().alias(\"count\"))\n", + " .collect()\n", + ")\n", + "volume_2019 = volume_trend.filter(pl.col(\"year\") == 2019).select(\n", + " \"district\", pl.col(\"count\").alias(\"count_2019\")\n", + ")\n", + "volume_2024 = volume_trend.filter(pl.col(\"year\") == 2024).select(\n", + " \"district\", pl.col(\"count\").alias(\"count_2024\")\n", + ")\n", + "volume_change = (\n", + " volume_2019.join(volume_2024, on=\"district\", how=\"inner\")\n", + " .with_columns(\n", + " (\n", + " (pl.col(\"count_2024\") - pl.col(\"count_2019\")) / pl.col(\"count_2019\") * 100\n", + " ).alias(\"pct_change\")\n", + " )\n", + " .sort(\"pct_change\", descending=True)\n", ")\n", - "volume_2019 = volume_trend.filter(pl.col(\"year\") == 2019).select(\"district\", pl.col(\"count\").alias(\"count_2019\"))\n", - "volume_2024 = volume_trend.filter(pl.col(\"year\") == 2024).select(\"district\", pl.col(\"count\").alias(\"count_2024\"))\n", - "volume_change = volume_2019.join(volume_2024, on=\"district\", how=\"inner\").with_columns(\n", - " ((pl.col(\"count_2024\") - pl.col(\"count_2019\")) / pl.col(\"count_2019\") * 100).alias(\"pct_change\")\n", - ").sort(\"pct_change\", descending=True)\n", "\n", - "fig = px.bar(volume_change.to_pandas(), x=\"pct_change\", y=\"district\", orientation=\"h\",\n", + "fig = px.bar(\n", + " volume_change.to_pandas(),\n", + " x=\"pct_change\",\n", + " y=\"district\",\n", + " orientation=\"h\",\n", " title=\"Market Activity Change: 2024 vs 2019 Transaction Volume (%)\",\n", - " color=\"pct_change\", color_continuous_scale=\"RdYlGn\",\n", - " labels={\"pct_change\": \"Volume Change (%)\", \"district\": \"Borough\"})\n", + " color=\"pct_change\",\n", + " color_continuous_scale=\"RdYlGn\",\n", + " labels={\"pct_change\": \"Volume Change (%)\", \"district\": \"Borough\"},\n", + ")\n", "fig.add_vline(x=0, line_dash=\"dash\", line_color=\"black\")\n", "fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n", "fig.show()" @@ -19120,17 +19413,32 @@ "source": [ "# New Build Share by Borough\n", "new_build_rate = (\n", - " lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n", - " .group_by(\"district\", \"old_new\").agg(pl.len().alias(\"count\")).collect()\n", + " lf.filter(LONDON_FILTER)\n", + " .filter(pl.col(\"date_of_transfer\").dt.year() >= 2020)\n", + " .group_by(\"district\", \"old_new\")\n", + " .agg(pl.len().alias(\"count\"))\n", + " .collect()\n", + ")\n", + "totals_nb = new_build_rate.group_by(\"district\").agg(\n", + " pl.col(\"count\").sum().alias(\"total\")\n", + ")\n", + "new_build_pct = (\n", + " new_build_rate.filter(pl.col(\"old_new\") == \"Y\")\n", + " .join(totals_nb, on=\"district\")\n", + " .with_columns((pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"new_build_pct\"))\n", + " .sort(\"new_build_pct\", descending=True)\n", ")\n", - "totals_nb = new_build_rate.group_by(\"district\").agg(pl.col(\"count\").sum().alias(\"total\"))\n", - "new_build_pct = new_build_rate.filter(pl.col(\"old_new\") == \"Y\").join(totals_nb, on=\"district\").with_columns(\n", - " (pl.col(\"count\") / pl.col(\"total\") * 100).alias(\"new_build_pct\")\n", - ").sort(\"new_build_pct\", descending=True)\n", "\n", - "fig = px.bar(new_build_pct.to_pandas(), x=\"new_build_pct\", y=\"district\", orientation=\"h\",\n", - " title=\"New Build Share by Borough (2020+)\", color=\"new_build_pct\", color_continuous_scale=\"Blues\",\n", - " labels={\"new_build_pct\": \"New Build %\", \"district\": \"Borough\"})\n", + "fig = px.bar(\n", + " new_build_pct.to_pandas(),\n", + " x=\"new_build_pct\",\n", + " y=\"district\",\n", + " orientation=\"h\",\n", + " title=\"New Build Share by Borough (2020+)\",\n", + " color=\"new_build_pct\",\n", + " color_continuous_scale=\"Blues\",\n", + " labels={\"new_build_pct\": \"New Build %\", \"district\": \"Borough\"},\n", + ")\n", "fig.update_layout(yaxis={\"categoryorder\": \"total ascending\"}, height=800)\n", "fig.show()" ] @@ -20260,21 +20568,66 @@ "source": [ "# Budget Finder: Borough Price Ranges\n", "recent_prices = (\n", - " lf.filter(LONDON_FILTER).filter(pl.col(\"date_of_transfer\").dt.year() >= 2023)\n", - " .group_by(\"district\").agg(pl.col(\"price\").quantile(0.25).alias(\"p25\"), pl.col(\"price\").median().alias(\"median\"), pl.col(\"price\").quantile(0.75).alias(\"p75\"))\n", - " .sort(\"median\").collect()\n", + " lf.filter(LONDON_FILTER)\n", + " .filter(pl.col(\"date_of_transfer\").dt.year() >= 2023)\n", + " .group_by(\"district\")\n", + " .agg(\n", + " pl.col(\"price\").quantile(0.25).alias(\"p25\"),\n", + " pl.col(\"price\").median().alias(\"median\"),\n", + " pl.col(\"price\").quantile(0.75).alias(\"p75\"),\n", + " )\n", + " .sort(\"median\")\n", + " .collect()\n", ")\n", "\n", "fig = go.Figure()\n", - "fig.add_trace(go.Bar(name=\"25th percentile\", y=recent_prices[\"district\"], x=recent_prices[\"p25\"], orientation=\"h\", marker_color=\"lightgreen\"))\n", - "fig.add_trace(go.Bar(name=\"Median\", y=recent_prices[\"district\"], x=recent_prices[\"median\"] - recent_prices[\"p25\"], orientation=\"h\", marker_color=\"steelblue\", base=recent_prices[\"p25\"]))\n", - "fig.add_trace(go.Bar(name=\"75th percentile\", y=recent_prices[\"district\"], x=recent_prices[\"p75\"] - recent_prices[\"median\"], orientation=\"h\", marker_color=\"coral\", base=recent_prices[\"median\"]))\n", + "fig.add_trace(\n", + " go.Bar(\n", + " name=\"25th percentile\",\n", + " y=recent_prices[\"district\"],\n", + " x=recent_prices[\"p25\"],\n", + " orientation=\"h\",\n", + " marker_color=\"lightgreen\",\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Bar(\n", + " name=\"Median\",\n", + " y=recent_prices[\"district\"],\n", + " x=recent_prices[\"median\"] - recent_prices[\"p25\"],\n", + " orientation=\"h\",\n", + " marker_color=\"steelblue\",\n", + " base=recent_prices[\"p25\"],\n", + " )\n", + ")\n", + "fig.add_trace(\n", + " go.Bar(\n", + " name=\"75th percentile\",\n", + " y=recent_prices[\"district\"],\n", + " x=recent_prices[\"p75\"] - recent_prices[\"median\"],\n", + " orientation=\"h\",\n", + " marker_color=\"coral\",\n", + " base=recent_prices[\"median\"],\n", + " )\n", + ")\n", "\n", "for budget in [300000, 500000, 750000, 1000000]:\n", - " fig.add_vline(x=budget, line_dash=\"dash\", line_color=\"gray\", annotation_text=f\"£{budget//1000}k\", annotation_position=\"top\")\n", + " fig.add_vline(\n", + " x=budget,\n", + " line_dash=\"dash\",\n", + " line_color=\"gray\",\n", + " annotation_text=f\"£{budget // 1000}k\",\n", + " annotation_position=\"top\",\n", + " )\n", "\n", - "fig.update_layout(title=\"Borough Price Ranges (2023+) - Find Your Budget\", xaxis_title=\"Price (£)\",\n", - " yaxis={\"categoryorder\": \"total ascending\"}, barmode=\"stack\", height=900, legend_title=\"Price Point\")\n", + "fig.update_layout(\n", + " title=\"Borough Price Ranges (2023+) - Find Your Budget\",\n", + " xaxis_title=\"Price (£)\",\n", + " yaxis={\"categoryorder\": \"total ascending\"},\n", + " barmode=\"stack\",\n", + " height=900,\n", + " legend_title=\"Price Point\",\n", + ")\n", "fig.show()" ] }, @@ -20288,18 +20641,43 @@ "comparison_data = (\n", " recent_prices.select(\"district\", \"median\")\n", " .join(growth_df.select(\"district\", \"cagr_5yr\"), on=\"district\", how=\"left\")\n", - " .join(freehold_pct.select(\"district\", \"percentage\").rename({\"percentage\": \"freehold_pct\"}), on=\"district\", how=\"left\")\n", + " .join(\n", + " freehold_pct.select(\"district\", \"percentage\").rename(\n", + " {\"percentage\": \"freehold_pct\"}\n", + " ),\n", + " on=\"district\",\n", + " how=\"left\",\n", + " )\n", " .join(volume_change.select(\"district\", \"count_2024\"), on=\"district\", how=\"left\")\n", " .join(new_build_pct.select(\"district\", \"new_build_pct\"), on=\"district\", how=\"left\")\n", ")\n", "\n", - "comparison_normalized = comparison_data.with_columns([\n", - " (100 - (pl.col(\"median\") - pl.col(\"median\").min()) / (pl.col(\"median\").max() - pl.col(\"median\").min()) * 100).alias(\"affordability_score\"),\n", - " ((pl.col(\"cagr_5yr\") - pl.col(\"cagr_5yr\").min()) / (pl.col(\"cagr_5yr\").max() - pl.col(\"cagr_5yr\").min()) * 100).alias(\"growth_score\"),\n", - " pl.col(\"freehold_pct\").alias(\"freehold_score\"),\n", - " ((pl.col(\"count_2024\") - pl.col(\"count_2024\").min()) / (pl.col(\"count_2024\").max() - pl.col(\"count_2024\").min()) * 100).alias(\"liquidity_score\"),\n", - " ((pl.col(\"new_build_pct\") - pl.col(\"new_build_pct\").min()) / (pl.col(\"new_build_pct\").max() - pl.col(\"new_build_pct\").min()) * 100).alias(\"development_score\"),\n", - "])" + "comparison_normalized = comparison_data.with_columns(\n", + " [\n", + " (\n", + " 100\n", + " - (pl.col(\"median\") - pl.col(\"median\").min())\n", + " / (pl.col(\"median\").max() - pl.col(\"median\").min())\n", + " * 100\n", + " ).alias(\"affordability_score\"),\n", + " (\n", + " (pl.col(\"cagr_5yr\") - pl.col(\"cagr_5yr\").min())\n", + " / (pl.col(\"cagr_5yr\").max() - pl.col(\"cagr_5yr\").min())\n", + " * 100\n", + " ).alias(\"growth_score\"),\n", + " pl.col(\"freehold_pct\").alias(\"freehold_score\"),\n", + " (\n", + " (pl.col(\"count_2024\") - pl.col(\"count_2024\").min())\n", + " / (pl.col(\"count_2024\").max() - pl.col(\"count_2024\").min())\n", + " * 100\n", + " ).alias(\"liquidity_score\"),\n", + " (\n", + " (pl.col(\"new_build_pct\") - pl.col(\"new_build_pct\").min())\n", + " / (pl.col(\"new_build_pct\").max() - pl.col(\"new_build_pct\").min())\n", + " * 100\n", + " ).alias(\"development_score\"),\n", + " ]\n", + ")" ] }, { @@ -21228,14 +21606,38 @@ ], "source": [ "# Borough Scores Heatmap\n", - "scores_only = comparison_normalized.select([\"district\", \"affordability_score\", \"growth_score\", \"freehold_score\", \"liquidity_score\", \"development_score\"]).drop_nulls().sort(\"affordability_score\", descending=True)\n", + "scores_only = (\n", + " comparison_normalized.select(\n", + " [\n", + " \"district\",\n", + " \"affordability_score\",\n", + " \"growth_score\",\n", + " \"freehold_score\",\n", + " \"liquidity_score\",\n", + " \"development_score\",\n", + " ]\n", + " )\n", + " .drop_nulls()\n", + " .sort(\"affordability_score\", descending=True)\n", + ")\n", "\n", "fig = px.imshow(\n", - " scores_only.select([\"affordability_score\", \"growth_score\", \"freehold_score\", \"liquidity_score\", \"development_score\"]).to_pandas(),\n", + " scores_only.select(\n", + " [\n", + " \"affordability_score\",\n", + " \"growth_score\",\n", + " \"freehold_score\",\n", + " \"liquidity_score\",\n", + " \"development_score\",\n", + " ]\n", + " ).to_pandas(),\n", " y=scores_only[\"district\"].to_list(),\n", " x=[\"Affordability\", \"5yr Growth\", \"Freehold %\", \"Liquidity\", \"New Builds\"],\n", - " color_continuous_scale=\"RdYlGn\", aspect=\"auto\",\n", - " title=\"Borough Scores Heatmap (Higher = Better)\", labels={\"color\": \"Score\"})\n", + " color_continuous_scale=\"RdYlGn\",\n", + " aspect=\"auto\",\n", + " title=\"Borough Scores Heatmap (Higher = Better)\",\n", + " labels={\"color\": \"Score\"},\n", + ")\n", "fig.update_layout(height=900)\n", "fig.show()" ] @@ -22258,17 +22660,49 @@ "median_afford = value_growth[\"affordability_score\"].median()\n", "median_growth = value_growth[\"growth_score\"].median()\n", "\n", - "fig = px.scatter(value_growth.to_pandas(), x=\"affordability_score\", y=\"growth_score\", text=\"district\",\n", - " size=\"liquidity_score\", color=\"freehold_score\", color_continuous_scale=\"Viridis\",\n", + "fig = px.scatter(\n", + " value_growth.to_pandas(),\n", + " x=\"affordability_score\",\n", + " y=\"growth_score\",\n", + " text=\"district\",\n", + " size=\"liquidity_score\",\n", + " color=\"freehold_score\",\n", + " color_continuous_scale=\"Viridis\",\n", " title=\"Value vs Growth Quadrant Analysis\",\n", - " labels={\"affordability_score\": \"Affordability (higher=cheaper)\", \"growth_score\": \"5yr Growth (higher=faster)\", \"liquidity_score\": \"Liquidity\", \"freehold_score\": \"Freehold %\"})\n", + " labels={\n", + " \"affordability_score\": \"Affordability (higher=cheaper)\",\n", + " \"growth_score\": \"5yr Growth (higher=faster)\",\n", + " \"liquidity_score\": \"Liquidity\",\n", + " \"freehold_score\": \"Freehold %\",\n", + " },\n", + ")\n", "\n", "fig.add_hline(y=median_growth, line_dash=\"dash\", line_color=\"gray\")\n", "fig.add_vline(x=median_afford, line_dash=\"dash\", line_color=\"gray\")\n", - "fig.add_annotation(x=75, y=85, text=\"Best Value
(Affordable+Growing)\", showarrow=False, font=dict(size=11))\n", - "fig.add_annotation(x=25, y=85, text=\"Premium Growth
(Expensive+Growing)\", showarrow=False, font=dict(size=11))\n", - "fig.add_annotation(x=75, y=15, text=\"Stable & Affordable
(Cheap+Steady)\", showarrow=False, font=dict(size=11))\n", - "fig.add_annotation(x=25, y=15, text=\"Caution
(Expensive+Slow)\", showarrow=False, font=dict(size=11))\n", + "fig.add_annotation(\n", + " x=75,\n", + " y=85,\n", + " text=\"Best Value
(Affordable+Growing)\",\n", + " showarrow=False,\n", + " font=dict(size=11),\n", + ")\n", + "fig.add_annotation(\n", + " x=25,\n", + " y=85,\n", + " text=\"Premium Growth
(Expensive+Growing)\",\n", + " showarrow=False,\n", + " font=dict(size=11),\n", + ")\n", + "fig.add_annotation(\n", + " x=75,\n", + " y=15,\n", + " text=\"Stable & Affordable
(Cheap+Steady)\",\n", + " showarrow=False,\n", + " font=dict(size=11),\n", + ")\n", + "fig.add_annotation(\n", + " x=25, y=15, text=\"Caution
(Expensive+Slow)\", showarrow=False, font=dict(size=11)\n", + ")\n", "fig.update_traces(textposition=\"top center\", textfont_size=8)\n", "fig.update_layout(height=700)\n", "fig.show()" @@ -22332,25 +22766,43 @@ ], "source": [ "# Borough Rankings with Composite Score\n", - "WEIGHTS = {\"affordability\": 0.30, \"growth\": 0.25, \"freehold\": 0.15, \"liquidity\": 0.15, \"development\": 0.15}\n", + "WEIGHTS = {\n", + " \"affordability\": 0.30,\n", + " \"growth\": 0.25,\n", + " \"freehold\": 0.15,\n", + " \"liquidity\": 0.15,\n", + " \"development\": 0.15,\n", + "}\n", "\n", "ranking = (\n", " comparison_normalized.drop_nulls()\n", " .with_columns(\n", - " (pl.col(\"affordability_score\") * WEIGHTS[\"affordability\"] +\n", - " pl.col(\"growth_score\") * WEIGHTS[\"growth\"] +\n", - " pl.col(\"freehold_score\") * WEIGHTS[\"freehold\"] +\n", - " pl.col(\"liquidity_score\") * WEIGHTS[\"liquidity\"] +\n", - " pl.col(\"development_score\") * WEIGHTS[\"development\"]).alias(\"composite_score\")\n", + " (\n", + " pl.col(\"affordability_score\") * WEIGHTS[\"affordability\"]\n", + " + pl.col(\"growth_score\") * WEIGHTS[\"growth\"]\n", + " + pl.col(\"freehold_score\") * WEIGHTS[\"freehold\"]\n", + " + pl.col(\"liquidity_score\") * WEIGHTS[\"liquidity\"]\n", + " + pl.col(\"development_score\") * WEIGHTS[\"development\"]\n", + " ).alias(\"composite_score\")\n", + " )\n", + " .select(\n", + " [\n", + " \"district\",\n", + " pl.col(\"median\").alias(\"median_price_2023\"),\n", + " \"cagr_5yr\",\n", + " \"freehold_pct\",\n", + " \"composite_score\",\n", + " ]\n", " )\n", - " .select([\"district\", pl.col(\"median\").alias(\"median_price_2023\"), \"cagr_5yr\", \"freehold_pct\", \"composite_score\"])\n", " .sort(\"composite_score\", descending=True)\n", " .with_row_index(\"rank\", offset=1)\n", ")\n", "\n", "print(\"Top 15 Boroughs by Composite Score:\")\n", - "print(\"Weights: Affordability 30%, Growth 25%, Freehold 15%, Liquidity 15%, Development 15%\")\n", - "print(\"=\"*80)\n", + "print(\n", + " \"Weights: Affordability 30%, Growth 25%, Freehold 15%, Liquidity 15%, Development 15%\"\n", + ")\n", + "print(\"=\" * 80)\n", "ranking.head(15)" ] }