import numpy as np import polars as pl import pytest from pipeline.utils.haversine import haversine_km, haversine_km_expr class TestHaversineKm: """Test numpy-based haversine distance calculation.""" def test_same_point(self): """Distance from a point to itself should be zero.""" lat = np.array([51.5074]) lon = np.array([-0.1278]) dist = haversine_km(lat, lon, 51.5074, -0.1278) assert np.allclose(dist, 0.0, atol=1e-10) def test_known_distance_london_to_paris(self): """Test distance from London to Paris (~344 km).""" # London coordinates london_lat = np.array([51.5074]) london_lon = np.array([-0.1278]) # Paris coordinates paris_lat = 48.8566 paris_lon = 2.3522 dist = haversine_km(london_lat, london_lon, paris_lat, paris_lon) # Expected distance is approximately 344 km assert np.allclose(dist[0], 344, rtol=0.01) def test_known_distance_new_york_to_london(self): """Test distance from New York to London (~5570 km).""" ny_lat = np.array([40.7128]) ny_lon = np.array([-74.0060]) london_lat = 51.5074 london_lon = -0.1278 dist = haversine_km(ny_lat, ny_lon, london_lat, london_lon) # Expected distance is approximately 5570 km assert np.allclose(dist[0], 5570, rtol=0.01) def test_multiple_points(self): """Test calculating distances from multiple points to a single destination.""" lats = np.array([51.5074, 48.8566, 40.7128]) # London, Paris, NYC lons = np.array([-0.1278, 2.3522, -74.0060]) # Distance to Edinburgh edinburgh_lat = 55.9533 edinburgh_lon = -3.1883 dists = haversine_km(lats, lons, edinburgh_lat, edinburgh_lon) # All distances should be positive assert np.all(dists > 0) # London to Edinburgh should be shortest (~530 km) assert dists[0] < dists[1] < dists[2] assert np.allclose(dists[0], 530, rtol=0.02) def test_equator_points(self): """Test distance along the equator.""" # Two points on the equator, 1 degree apart lat = np.array([0.0]) lon1 = np.array([0.0]) lon2 = 1.0 dist = haversine_km(lat, lon1, 0.0, lon2) # 1 degree at equator ≈ 111 km assert np.allclose(dist[0], 111.2, rtol=0.01) class TestHaversineKmExpr: """Test Polars expression-based haversine distance calculation.""" def test_same_point(self): """Distance from a point to itself should be zero.""" df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]}) result = df.select( haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist") ) assert result["dist"][0] == pytest.approx(0.0, abs=1e-10) def test_known_distance_london_to_paris(self): """Test distance from London to Paris (~344 km).""" df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]}) result = df.select( haversine_km_expr("lat", "lon", 48.8566, 2.3522).alias("dist") ) assert result["dist"][0] == pytest.approx(344, rel=0.01) def test_known_distance_new_york_to_london(self): """Test distance from New York to London (~5570 km).""" df = pl.DataFrame({"lat": [40.7128], "lon": [-74.0060]}) result = df.select( haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist") ) assert result["dist"][0] == pytest.approx(5570, rel=0.01) def test_multiple_points(self): """Test calculating distances from multiple points to a single destination.""" df = pl.DataFrame( { "lat": [51.5074, 48.8566, 40.7128], # London, Paris, NYC "lon": [-0.1278, 2.3522, -74.0060], } ) # Distance to Edinburgh result = df.select( haversine_km_expr("lat", "lon", 55.9533, -3.1883).alias("dist") ) dists = result["dist"].to_numpy() # All distances should be positive assert np.all(dists > 0) # London to Edinburgh should be shortest (~530 km) assert dists[0] < dists[1] < dists[2] assert dists[0] == pytest.approx(530, rel=0.02) def test_equator_points(self): """Test distance along the equator.""" df = pl.DataFrame({"lat": [0.0], "lon": [0.0]}) result = df.select(haversine_km_expr("lat", "lon", 0.0, 1.0).alias("dist")) # 1 degree at equator ≈ 111 km assert result["dist"][0] == pytest.approx(111.2, rel=0.01) class TestHaversineConsistency: """Test that both implementations give consistent results.""" def test_numpy_and_polars_match(self): """Both implementations should give identical results.""" # Test data lats = np.array([51.5074, 48.8566, 40.7128, 55.9533, 52.5200]) lons = np.array([-0.1278, 2.3522, -74.0060, -3.1883, 13.4050]) dest_lat = 41.9028 # Rome dest_lon = 12.4964 # Numpy version numpy_dists = haversine_km(lats, lons, dest_lat, dest_lon) # Polars version df = pl.DataFrame({"lat": lats, "lon": lons}) polars_result = df.select( haversine_km_expr("lat", "lon", dest_lat, dest_lon).alias("dist") ) polars_dists = polars_result["dist"].to_numpy() # Should be identical (or at least very close due to floating point) assert np.allclose(numpy_dists, polars_dists, rtol=1e-10)