Format python
This commit is contained in:
parent
85f5770e09
commit
4c258018c3
17 changed files with 348 additions and 248 deletions
|
|
@ -70,7 +70,9 @@ def _count_pois_per_postcode(
|
|||
pc_codes = postcodes_df["postcode"].to_list()
|
||||
|
||||
# Initialize result arrays
|
||||
result_counts = {group: np.zeros(n_postcodes, dtype=np.int32) for group in POI_GROUPS}
|
||||
result_counts = {
|
||||
group: np.zeros(n_postcodes, dtype=np.int32) for group in POI_GROUPS
|
||||
}
|
||||
|
||||
# Process in batches with progress
|
||||
batch_size = 50000
|
||||
|
|
@ -83,7 +85,9 @@ def _count_pois_per_postcode(
|
|||
end_idx = min(start_idx + batch_size, n_postcodes)
|
||||
|
||||
if batch_idx % 5 == 0:
|
||||
print(f" Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}")
|
||||
print(
|
||||
f" Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}"
|
||||
)
|
||||
|
||||
# Process batch
|
||||
for i in range(start_idx, end_idx):
|
||||
|
|
@ -109,12 +113,7 @@ def _count_pois_per_postcode(
|
|||
nearby = np.concatenate(nearby_indices)
|
||||
|
||||
# Vectorized distance calculation for all nearby POIs
|
||||
distances = haversine_km(
|
||||
poi_lats[nearby],
|
||||
poi_lngs[nearby],
|
||||
pc_lat,
|
||||
pc_lon
|
||||
)
|
||||
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lat, pc_lon)
|
||||
|
||||
# Filter by radius
|
||||
within_mask = distances <= radius_km
|
||||
|
|
@ -147,13 +146,13 @@ def count_pois_within_radius(
|
|||
"""
|
||||
# Get unique postcodes with coordinates
|
||||
print("Deduplicating postcodes...")
|
||||
unique_postcodes = (
|
||||
properties
|
||||
.select(["postcode", "lat", "lon"])
|
||||
.unique(subset=["postcode"])
|
||||
unique_postcodes = properties.select(["postcode", "lat", "lon"]).unique(
|
||||
subset=["postcode"]
|
||||
)
|
||||
|
||||
print(f" {len(properties):,} properties → {len(unique_postcodes):,} unique postcodes")
|
||||
print(
|
||||
f" {len(properties):,} properties → {len(unique_postcodes):,} unique postcodes"
|
||||
)
|
||||
|
||||
# Count POIs per postcode
|
||||
postcode_counts = _count_pois_per_postcode(unique_postcodes, pois, radius_km)
|
||||
|
|
@ -174,11 +173,7 @@ def count_pois_within_radius(
|
|||
result_lazy = (
|
||||
properties.lazy()
|
||||
.select("postcode")
|
||||
.join(
|
||||
pl.scan_parquet(tmp_path),
|
||||
on="postcode",
|
||||
how="left"
|
||||
)
|
||||
.join(pl.scan_parquet(tmp_path), on="postcode", how="left")
|
||||
.select(count_cols)
|
||||
.fill_null(0)
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue