import polars as pl

from pipeline.transform.crime import find_street_crime_csvs, transform_crime


def test_find_street_crime_csvs_ignores_archive_sidecars(tmp_path):
    crime_dir = tmp_path / "crime"
    month_dir = crime_dir / "2024-01"
    month_dir.mkdir(parents=True)
    street = month_dir / "2024-01-test-force-street.csv"
    street.touch()
    (month_dir / "2024-01-test-force-outcomes.csv").touch()
    (month_dir / "2024-01-test-force-stop-and-search.csv").touch()
    (crime_dir / "notes.csv").touch()

    csvs, ignored_count = find_street_crime_csvs(crime_dir)

    assert csvs == [street]
    assert ignored_count == 3


def test_transform_crime_reads_only_street_crime_csvs(tmp_path):
    crime_dir = tmp_path / "crime"
    month_dir = crime_dir / "2024-01"
    month_dir.mkdir(parents=True)

    (month_dir / "2024-01-test-force-street.csv").write_text(
        "\n".join(
            [
                "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context",
                "1,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
                "2,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
                "3,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,,No LSOA,Robbery,Under investigation,",
            ]
        )
        + "\n"
    )
    (month_dir / "2024-01-test-force-outcomes.csv").write_text(
        "Crime ID,Month,Reported by,Outcome type\n1,2024-01,Test Force,Charged\n"
    )

    output = tmp_path / "crime.parquet"
    transform_crime(crime_dir, output)

    result = pl.read_parquet(output).to_dicts()

    assert result == [{"LSOA code": "E01000001", "Burglary (avg/yr)": 24.0}]


def test_transform_crime_annualises_over_all_valid_months(tmp_path):
    crime_dir = tmp_path / "crime"
    jan_dir = crime_dir / "2024-01"
    feb_dir = crime_dir / "2024-02"
    jan_dir.mkdir(parents=True)
    feb_dir.mkdir(parents=True)

    header = "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context"
    (jan_dir / "2024-01-test-force-street.csv").write_text(
        "\n".join(
            [
                header,
                "1,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
                "2,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
                "3,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000002,Other LSOA,Robbery,Under investigation,",
            ]
        )
        + "\n"
    )
    (feb_dir / "2024-02-test-force-street.csv").write_text(
        "\n".join(
            [
                header,
                "4,2024-02,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000002,Other LSOA,Robbery,Under investigation,",
            ]
        )
        + "\n"
    )

    output = tmp_path / "crime.parquet"
    transform_crime(crime_dir, output)

    result = pl.read_parquet(output).sort("LSOA code").to_dicts()

    assert result == [
        {
            "LSOA code": "E01000001",
            "Burglary (avg/yr)": 12.0,
            "Robbery (avg/yr)": 0.0,
        },
        {
            "LSOA code": "E01000002",
            "Burglary (avg/yr)": 0.0,
            "Robbery (avg/yr)": 12.0,
        },
    ]


def test_transform_crime_fails_without_valid_months(tmp_path):
    crime_dir = tmp_path / "crime"
    month_dir = crime_dir / "2024-01"
    month_dir.mkdir(parents=True)
    (month_dir / "2024-01-test-force-street.csv").write_text(
        "\n".join(
            [
                "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context",
                "1,,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
            ]
        )
        + "\n"
    )

    output = tmp_path / "crime.parquet"

    try:
        transform_crime(crime_dir, output)
    except ValueError as exc:
        assert "No valid crime months" in str(exc)
    else:
        raise AssertionError("Expected ValueError")