import polars as pl

from pipeline.transform.crime import find_street_crime_csvs, transform_crime


def test_find_street_crime_csvs_ignores_archive_sidecars(tmp_path):
    crime_dir = tmp_path / "crime"
    month_dir = crime_dir / "2024-01"
    month_dir.mkdir(parents=True)
    street = month_dir / "2024-01-test-force-street.csv"
    street.touch()
    (month_dir / "2024-01-test-force-outcomes.csv").touch()
    (month_dir / "2024-01-test-force-stop-and-search.csv").touch()
    (crime_dir / "notes.csv").touch()

    csvs, ignored_count = find_street_crime_csvs(crime_dir)

    assert csvs == [street]
    assert ignored_count == 3


def test_transform_crime_reads_only_street_crime_csvs(tmp_path):
    crime_dir = tmp_path / "crime"
    month_dir = crime_dir / "2024-01"
    month_dir.mkdir(parents=True)

    (month_dir / "2024-01-test-force-street.csv").write_text(
        "\n".join(
            [
                "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context",
                "1,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
                "2,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
                "3,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,,No LSOA,Robbery,Under investigation,",
            ]
        )
        + "\n"
    )
    (month_dir / "2024-01-test-force-outcomes.csv").write_text(
        "Crime ID,Month,Reported by,Outcome type\n1,2024-01,Test Force,Charged\n"
    )

    output = tmp_path / "crime.parquet"
    transform_crime(crime_dir, output)

    result = pl.read_parquet(output).to_dicts()

    assert result == [{"LSOA code": "E01000001", "Burglary (avg/yr)": 2.0}]