scraping and data
This commit is contained in:
parent
d98819b569
commit
8688b7475e
43 changed files with 4920 additions and 531 deletions
|
|
@ -1,19 +1,83 @@
|
|||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import pytest
|
||||
import shapely
|
||||
|
||||
from pipeline.transform.tree_density import (
|
||||
STREET_TREE_COVERAGE_COL,
|
||||
STREET_TREE_DENSITY_COL,
|
||||
_add_nfi_batch,
|
||||
_coverage_percentile_expr,
|
||||
_metric_columns,
|
||||
_postcode_buffers,
|
||||
_postcode_density_percentile_col,
|
||||
_with_postcode_density_percentiles,
|
||||
_write_street_rollups,
|
||||
)
|
||||
|
||||
|
||||
def test_nfi_accumulation_adds_only_clipped_overlap_area() -> None:
|
||||
radius_m = 50
|
||||
points = pl.DataFrame({"postcode": ["A", "B"], "x": [0.0, 1000.0], "y": [0.0, 0.0]})
|
||||
circles, tree = _postcode_buffers(points, radius_m)
|
||||
buffer_area = math.pi * radius_m * radius_m
|
||||
|
||||
# A large woodland square centred on postcode A fully covers A's circle.
|
||||
canopy_area = np.zeros(2)
|
||||
feature_count = np.zeros(2, dtype=np.uint32)
|
||||
big = shapely.box(-500, -500, 500, 500) # 1,000,000 sqm parcel
|
||||
_add_nfi_batch(
|
||||
np.array([big], dtype=object),
|
||||
np.array(["Woodland"], dtype=object),
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
)
|
||||
# Only the clipped circle area is added (the 32-gon buffer approximates the
|
||||
# circle to ~1%), NOT the full 1,000,000 sqm polygon.
|
||||
assert canopy_area[0] == pytest.approx(buffer_area, rel=1e-2)
|
||||
assert canopy_area[0] <= buffer_area # never exceeds the buffer area
|
||||
assert canopy_area[1] == 0.0 # postcode B is 1km away, no overlap
|
||||
assert feature_count.tolist() == [1, 0]
|
||||
|
||||
# A large parcel that only slivers into B's circle must add only the sliver,
|
||||
# not its full area -- the failure mode the old centroid path could not avoid.
|
||||
canopy_area = np.zeros(2)
|
||||
feature_count = np.zeros(2, dtype=np.uint32)
|
||||
sliver = shapely.box(1040, -500, 2000, 500) # left edge 10m inside B's circle
|
||||
_add_nfi_batch(
|
||||
np.array([sliver], dtype=object),
|
||||
np.array(["Woodland"], dtype=object),
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
)
|
||||
assert canopy_area[0] == 0.0
|
||||
assert 0.0 < canopy_area[1] < buffer_area # tiny segment, far below 1M sqm
|
||||
|
||||
# Non-woodland categories contribute nothing.
|
||||
canopy_area = np.zeros(2)
|
||||
feature_count = np.zeros(2, dtype=np.uint32)
|
||||
_add_nfi_batch(
|
||||
np.array([big], dtype=object),
|
||||
np.array(["Non woodland"], dtype=object),
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
)
|
||||
assert canopy_area.tolist() == [0.0, 0.0]
|
||||
assert feature_count.tolist() == [0, 0]
|
||||
|
||||
|
||||
def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None:
|
||||
df = pl.DataFrame({"coverage": [0.0, 5.0, 10.0, None]})
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue