use std::collections::{HashMap, HashSet};

use metrics::counter;
use rustc_hash::FxHashMap;
use tracing::error;

use crate::consts::PRICE_HISTORY_POINTS_LIMIT;
use crate::data::crime_by_year::CrimeByYearData;
use crate::data::{FeatureStats, PostcodePoiMetrics, PropertyData};

use super::hexagon_stats::{
    CrimeYearPoint, CrimeYearStats, EnumFeatureStats, HistogramStats, NumericFeatureStats,
    PricePoint,
};

/// Extract price history (year, price) pairs from matching rows, downsampled if needed.
pub fn extract_price_history(
    matching_rows: &[usize],
    data: &PropertyData,
    feature_name_to_index: &FxHashMap<String, usize>,
) -> Vec<PricePoint> {
    let year_idx = feature_name_to_index
        .get("Date of last transaction")
        .copied();
    match year_idx {
        Some(yi) => {
            let mut points: Vec<PricePoint> = matching_rows
                .iter()
                .filter_map(|&row| {
                    let year = data.get_feature(row, yi);
                    let price = data.last_known_price_raw(row);
                    if year.is_finite() && price.is_finite() {
                        Some(PricePoint { year, price })
                    } else {
                        None
                    }
                })
                .collect();
            if points.len() > PRICE_HISTORY_POINTS_LIMIT {
                let step = points.len() as f64 / PRICE_HISTORY_POINTS_LIMIT as f64;
                points = (0..PRICE_HISTORY_POINTS_LIMIT)
                    .map(|i| {
                        let idx = (i as f64 * step) as usize;
                        PricePoint {
                            year: points[idx].year,
                            price: points[idx].price,
                        }
                    })
                    .collect();
            }
            points
        }
        None => Vec::new(),
    }
}

/// Per-feature accumulator kind, determined once before the row loop.
enum FeatureAccum {
    /// Numeric: track count, min, max, sum, histogram bins.
    Numeric {
        count: usize,
        min_value: f32,
        max_value: f32,
        sum: f64,
        bins: Vec<u64>,
        p1: f32,
        p99: f32,
        middle_width: f32,
        num_bins: usize,
        global_min: f32,
        global_max: f32,
    },
    /// Enum: count occurrences per variant index.
    Enum { value_counts: Vec<u64> },
    /// Feature skipped (not in field_set).
    Skip,
}

/// Compute per-feature stats (numeric histograms + enum counts) for the given rows.
/// Single-pass: iterates rows in the outer loop for cache-friendly row-major access.
#[allow(clippy::too_many_arguments)]
pub fn compute_feature_stats(
    matching_rows: &[usize],
    data: &PropertyData,
    feature_names: &[String],
    enum_values: &FxHashMap<usize, Vec<String>>,
    feature_stats_data: &[FeatureStats],
    fields_specified: bool,
    field_set: &HashSet<String>,
) -> (Vec<NumericFeatureStats>, Vec<EnumFeatureStats>) {
    let num_features = feature_names.len();

    // Pre-allocate accumulators for all features
    let mut accums: Vec<FeatureAccum> = (0..num_features)
        .map(|fi| {
            let feature_name = &feature_names[fi];
            if fields_specified && !field_set.contains(feature_name.as_str()) {
                return FeatureAccum::Skip;
            }

            if let Some(ev) = enum_values.get(&fi) {
                FeatureAccum::Enum {
                    value_counts: vec![0u64; ev.len()],
                }
            } else {
                let global_hist = &feature_stats_data[fi].histogram;
                let p1 = global_hist.p1;
                let p99 = global_hist.p99;
                let num_bins = global_hist.counts.len();
                let middle_bins = num_bins.saturating_sub(2);
                let middle_width = if middle_bins > 0 && p99 > p1 {
                    (p99 - p1) / middle_bins as f32
                } else {
                    0.0
                };
                FeatureAccum::Numeric {
                    count: 0,
                    min_value: f32::INFINITY,
                    max_value: f32::NEG_INFINITY,
                    sum: 0.0,
                    bins: vec![0u64; num_bins],
                    p1,
                    p99,
                    middle_width,
                    num_bins,
                    global_min: global_hist.min,
                    global_max: global_hist.max,
                }
            }
        })
        .collect();

    // Single pass: outer loop = rows, inner loop = features (cache-friendly row-major access)
    for &row in matching_rows {
        for (fi, accum) in accums.iter_mut().enumerate() {
            match accum {
                FeatureAccum::Skip => {}
                FeatureAccum::Enum { value_counts } => {
                    let value = data.get_feature(row, fi);
                    if value.is_finite() {
                        // Reject negatives, NaN-via-large-cast, and any out-of-range
                        // index. A schema/data mismatch is a critical data-integrity
                        // bug — skip the row, count it, and surface as error so
                        // monitoring catches it.
                        let len = value_counts.len();
                        let idx_ok = value >= 0.0 && (value as usize) < len;
                        if idx_ok {
                            value_counts[value as usize] += 1;
                        } else {
                            counter!("stats_enum_oob_total").increment(1);
                            error!(
                                feature = feature_names[fi].as_str(),
                                value,
                                max = len,
                                "Enum index out of bounds — data/schema mismatch"
                            );
                        }
                    }
                }
                FeatureAccum::Numeric {
                    count,
                    min_value,
                    max_value,
                    sum,
                    bins,
                    p1,
                    p99,
                    middle_width,
                    num_bins,
                    ..
                } => {
                    let value = data.get_feature(row, fi);
                    if value.is_finite() {
                        *count += 1;
                        if value < *min_value {
                            *min_value = value;
                        }
                        if value > *max_value {
                            *max_value = value;
                        }
                        *sum += value as f64;

                        let bin = if value < *p1 {
                            0
                        } else if value >= *p99 {
                            *num_bins - 1
                        } else if *middle_width > 0.0 {
                            let middle_bin = ((value - *p1) / *middle_width) as usize;
                            (1 + middle_bin).min(*num_bins - 2)
                        } else {
                            *num_bins / 2
                        };
                        bins[bin] += 1;
                    }
                }
            }
        }
    }

    // Build response structs from accumulators
    let mut numeric_features = Vec::new();
    let mut enum_features_out = Vec::new();

    for (fi, accum) in accums.into_iter().enumerate() {
        match accum {
            FeatureAccum::Skip => {}
            FeatureAccum::Enum { value_counts } => {
                let ev = &enum_values[&fi];
                let counts: HashMap<String, u64> = value_counts
                    .iter()
                    .enumerate()
                    .filter(|(_, &count)| count > 0)
                    .map(|(idx, &count)| (ev[idx].clone(), count))
                    .collect();

                if !counts.is_empty() {
                    enum_features_out.push(EnumFeatureStats {
                        name: feature_names[fi].clone(),
                        counts,
                    });
                }
            }
            FeatureAccum::Numeric {
                count,
                min_value,
                max_value,
                sum,
                bins,
                p1,
                p99,
                global_min,
                global_max,
                ..
            } => {
                if count > 0 {
                    numeric_features.push(NumericFeatureStats {
                        name: feature_names[fi].clone(),
                        count,
                        min: min_value as f64,
                        max: max_value as f64,
                        mean: sum / count as f64,
                        histogram: HistogramStats {
                            min: global_min as f64,
                            max: global_max as f64,
                            p1: p1 as f64,
                            p99: p99 as f64,
                            counts: bins,
                        },
                    });
                }
            }
        }
    }

    (numeric_features, enum_features_out)
}

/// Compute property-weighted per-year crime means across the selection.
///
/// Each matching property contributes its postcode's per-year counts (incidents
/// near that postcode); this is the same property-weighted-average shape used
/// elsewhere in the right pane.
///
/// Denominators are COVERAGE-AWARE: police.uk has multi-year publication gaps
/// for whole forces (e.g. Greater Manchester from 2019-07), and the pipeline
/// emits a `covered_years` calendar per postcode. A postcode only counts toward
/// a year's denominator if its force published that year — and only then does
/// its missing bar mean a genuine zero. Years no selected postcode covers are
/// omitted entirely (charted as gaps, not zeros). Postcodes without coverage
/// info (legacy parquet without the column) count toward every year, restoring
/// the previous behaviour.
pub fn compute_crime_by_year(
    matching_rows: &[usize],
    data: &PropertyData,
    crime_by_year: &CrimeByYearData,
    fields_specified: bool,
    field_set: &HashSet<String>,
) -> Vec<CrimeYearStats> {
    if crime_by_year.crime_types.is_empty() || matching_rows.is_empty() {
        return Vec::new();
    }

    let num_types = crime_by_year.crime_types.len();
    let mut per_type_year_sums: Vec<FxHashMap<i32, f64>> =
        (0..num_types).map(|_| FxHashMap::default()).collect();
    // Per-year denominator parts: rows whose coverage calendar includes the
    // year, plus rows with no calendar at all (legacy: covered everywhere).
    let mut covered_counts: FxHashMap<i32, u32> = FxHashMap::default();
    let mut fully_covered_rows: u32 = 0;

    for &row in matching_rows {
        let postcode = data.postcode(row);

        match crime_by_year.covered_years_by_postcode.get(postcode) {
            Some(years) => {
                // An empty list (force gap for the whole window / unusable
                // boundary geometry) adds nothing: the postcode's crime
                // picture is unknown and must not dilute any year's mean.
                for &year in years {
                    *covered_counts.entry(year).or_insert(0) += 1;
                }
            }
            None => fully_covered_rows += 1,
        }

        // A postcode with a row but no series for a given type had no recorded
        // incidents of that type: it contributes 0 to the sums, and its covered
        // years still count in the denominator — a genuine zero. Uncovered
        // years are excluded via the denominators instead.
        if let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) {
            for series in series_list {
                let acc = &mut per_type_year_sums[series.type_idx as usize];
                for point in &series.points {
                    *acc.entry(point.year).or_insert(0.0) += point.count as f64;
                }
            }
        }
    }

    let mut out = Vec::new();
    for (type_idx, name) in crime_by_year.crime_types.iter().enumerate() {
        // Crime types in the by-year side table are bare (e.g. "Burglary"), while
        // the configured feature names carry an " (avg/yr)" suffix. Match either
        // form so callers can pass the feature names they already know.
        if fields_specified {
            let with_suffix = format!("{name} (avg/yr)");
            if !field_set.contains(name.as_str()) && !field_set.contains(with_suffix.as_str()) {
                continue;
            }
        }
        let years = crime_by_year
            .years_by_type
            .get(type_idx)
            .map(Vec::as_slice)
            .unwrap_or(&[]);
        if years.is_empty() {
            continue;
        }
        let sums = &per_type_year_sums[type_idx];
        let points: Vec<CrimeYearPoint> = years
            .iter()
            .filter_map(|&year| {
                let denom = fully_covered_rows
                    + covered_counts.get(&year).copied().unwrap_or(0);
                if denom == 0 {
                    // No selected postcode has published data for this year.
                    return None;
                }
                Some(CrimeYearPoint {
                    year,
                    count: (sums.get(&year).copied().unwrap_or(0.0) / denom as f64)
                        as f32,
                })
            })
            .collect();
        if points.is_empty() {
            continue;
        }
        out.push(CrimeYearStats {
            name: name.clone(),
            points,
        });
    }

    out
}

pub fn compute_poi_feature_stats(
    matching_rows: &[usize],
    poi_metrics: &PostcodePoiMetrics,
    fields_specified: bool,
    field_set: &HashSet<String>,
) -> Vec<NumericFeatureStats> {
    let mut out = Vec::new();
    for (metric_idx, name) in poi_metrics.feature_names.iter().enumerate() {
        if fields_specified && !field_set.contains(name.as_str()) {
            continue;
        }

        let global_hist = &poi_metrics.feature_stats[metric_idx].histogram;
        let p1 = global_hist.p1;
        let p99 = global_hist.p99;
        let num_bins = global_hist.counts.len();
        let middle_bins = num_bins.saturating_sub(2);
        let middle_width = if middle_bins > 0 && p99 > p1 {
            (p99 - p1) / middle_bins as f32
        } else {
            0.0
        };

        let mut count = 0usize;
        let mut min_value = f32::INFINITY;
        let mut max_value = f32::NEG_INFINITY;
        let mut sum = 0.0f64;
        let mut bins = vec![0u64; num_bins];

        for &row in matching_rows {
            let value = poi_metrics.get_for_property_row(row, metric_idx);
            if !value.is_finite() {
                continue;
            }
            count += 1;
            if value < min_value {
                min_value = value;
            }
            if value > max_value {
                max_value = value;
            }
            sum += value as f64;

            let bin = if value < p1 {
                0
            } else if value >= p99 {
                num_bins - 1
            } else if middle_width > 0.0 {
                let middle_bin = ((value - p1) / middle_width) as usize;
                (1 + middle_bin).min(num_bins - 2)
            } else {
                num_bins / 2
            };
            bins[bin] += 1;
        }

        if count > 0 {
            out.push(NumericFeatureStats {
                name: name.clone(),
                count,
                min: min_value as f64,
                max: max_value as f64,
                mean: sum / count as f64,
                histogram: HistogramStats {
                    min: global_hist.min as f64,
                    max: global_hist.max as f64,
                    p1: p1 as f64,
                    p99: p99 as f64,
                    counts: bins,
                },
            });
        }
    }

    out
}