"""
MOSOF — reference implementation
================================

Multi-Objective Sensor Optimisation Framework: pick a subset of candidate
sensors that simultaneously (a) maximises diagnostic performance,
(b) minimises suite cost, and (c) maximises suite reliability.

Pedagogical reimplementation of the framework described in:
  Suslu, Ali, Jennions (2026). MOSOF with NDCI: A Cross-Subsystem
  Evaluation. Sensors 26(1), 160. doi:10.3390/s26010160

The math (NDCI-weighted coverage, series-MTBF aggregation, three-axis
Pareto extraction) follows the published paper. The implementation is a
fresh write against pymoo's NSGA-II driver — short enough to read in
one sitting.

Run:
    python mosof_demo.py

Outputs:
    pareto_front.csv  — one row per non-dominated suite
    Console summary   — front size, knee composition, axis ranges
"""
from __future__ import annotations

import csv
import numpy as np
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.problem import ElementwiseProblem
from pymoo.operators.crossover.hux import HalfUniformCrossover
from pymoo.operators.mutation.bitflip import BitflipMutation
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from pymoo.optimize import minimize


# ── Candidate sensor pool ────────────────────────────────────────────────
# Parameters chosen to match the order-of-magnitude ranges reported in
# Section 4.3 of the thesis (sensor-family MTBF medians, unit costs,
# per-subsystem coverage). Specific values here are synthetic — the
# published per-sensor table is in the thesis appendix.

SUBSYSTEMS = ["Engine", "Fuel", "EPS", "ECS"]
RNG = np.random.default_rng(seed=42)


def make_candidate_pool(n_per_subsystem: int = 8) -> dict:
    """Build a candidate sensor pool with realistic-shaped attributes."""
    rows = []
    for sub_i, subsystem in enumerate(SUBSYSTEMS):
        # Cost per sensor in the family — kUSD.
        cost_range = {"Engine": (3.0, 8.0), "Fuel": (1.5, 4.5),
                      "EPS": (2.0, 5.5), "ECS": (1.0, 3.5)}[subsystem]
        # MTBF per sensor — kilohours. Aerospace sensors are highly reliable.
        mtbf_range = {"Engine": (180, 480), "Fuel": (220, 520),
                      "EPS": (300, 700), "ECS": (240, 600)}[subsystem]
        # NDCI contribution — info-theoretic diagnostic value (normalised).
        ndci_range = (0.05, 0.18)
        for j in range(n_per_subsystem):
            rows.append({
                "id": f"{subsystem[:3].upper()}-{j+1:02d}",
                "subsystem": subsystem,
                "cost_kUSD": float(RNG.uniform(*cost_range)),
                "mtbf_kh": float(RNG.uniform(*mtbf_range)),
                "ndci": float(RNG.uniform(*ndci_range)),
            })
    return {"sensors": rows, "n": len(rows)}


# ── Objectives ───────────────────────────────────────────────────────────

def evaluate_suite(mask: np.ndarray, pool: dict) -> tuple[float, float, float]:
    """Compute (performance, cost, MTBF) for a binary selection mask.

    performance — NDCI-weighted coverage, normalised to [0, 1] against
                  the pool's total potential NDCI.
    cost        — sum of unit costs in kUSD (lower is better).
    mtbf        — series-equivalent MTBF in kilohours (higher is better).
    """
    sensors = pool["sensors"]
    selected_idx = np.flatnonzero(mask.astype(bool))
    if selected_idx.size == 0:
        return 0.0, 0.0, 0.0

    sel = [sensors[i] for i in selected_idx]

    # NDCI-weighted coverage, normalised by the full-pool total NDCI.
    total_pool_ndci = sum(s["ndci"] for s in sensors)
    suite_ndci = sum(s["ndci"] for s in sel)
    # Sub-additive saturation: extra sensors give diminishing returns.
    perf = 1.0 - np.exp(-2.5 * suite_ndci / total_pool_ndci)

    cost = sum(s["cost_kUSD"] for s in sel)

    # Series-equivalent MTBF — adding sensors reduces system MTBF.
    inv_mtbf = sum(1.0 / s["mtbf_kh"] for s in sel)
    mtbf = 1.0 / inv_mtbf if inv_mtbf > 0 else 0.0

    return float(perf), float(cost), float(mtbf)


class MOSOFProblem(ElementwiseProblem):
    """Three-objective MOSOF problem in binary decision space."""

    def __init__(self, pool: dict):
        n = pool["n"]
        super().__init__(
            n_var=n, n_obj=3, n_ieq_constr=1,
            xl=0, xu=1, vtype=int,
        )
        self.pool = pool

    def _evaluate(self, x, out, *args, **kwargs):
        perf, cost, mtbf = evaluate_suite(x, self.pool)
        # NSGA-II minimises; negate the maximise objectives.
        out["F"] = [-perf, cost, -mtbf]
        # Constraint: suite must have at least three sensors to be useful.
        out["G"] = [3 - int(x.sum())]


# ── Pareto-front utilities ───────────────────────────────────────────────

def knee_by_perpendicular(front: np.ndarray) -> int:
    """Find the knee as the point with max perpendicular distance to the
    line joining the two extreme objectives (perf vs cost projection)."""
    perf = front[:, 0]  # already negated for minimise
    cost = front[:, 1]
    # Normalise both axes to [0, 1] for fair distance.
    perf_n = (perf - perf.min()) / max(perf.max() - perf.min(), 1e-9)
    cost_n = (cost - cost.min()) / max(cost.max() - cost.min(), 1e-9)
    p1 = np.array([perf_n[np.argmin(perf_n)], cost_n[np.argmin(perf_n)]])
    p2 = np.array([perf_n[np.argmax(perf_n)], cost_n[np.argmax(perf_n)]])
    line = p2 - p1
    line_len = np.linalg.norm(line) or 1e-9
    dists = []
    for i in range(len(front)):
        pt = np.array([perf_n[i], cost_n[i]])
        # 2-D perpendicular distance: |a×b| / |a| where a=line, b=pt-p1.
        a, b = line, pt - p1
        d = (a[0] * b[1] - a[1] * b[0]) / line_len
        dists.append(abs(d))
    return int(np.argmax(dists))


# ── Driver ───────────────────────────────────────────────────────────────

def main():
    pool = make_candidate_pool(n_per_subsystem=8)
    print(f"Candidate pool: {pool['n']} sensors across "
          f"{len(SUBSYSTEMS)} subsystems\n")

    problem = MOSOFProblem(pool)
    algorithm = NSGA2(
        pop_size=100,
        sampling=BinaryRandomSampling(),
        crossover=HalfUniformCrossover(),
        mutation=BitflipMutation(prob=1.0 / pool["n"]),
        eliminate_duplicates=True,
    )
    result = minimize(problem, algorithm, ("n_gen", 200), seed=1, verbose=False)

    # Decode the Pareto-optimal suites.
    front = result.F  # [-perf, cost, -mtbf]
    masks = result.X
    n_front = len(front)
    print(f"Pareto front: {n_front} non-dominated suites\n")

    # Identify the knee on the perf-cost projection.
    knee = knee_by_perpendicular(front)
    knee_mask = masks[knee].astype(bool)
    knee_perf, knee_cost, knee_mtbf = evaluate_suite(knee_mask, pool)
    knee_sensors = [pool["sensors"][i] for i in np.flatnonzero(knee_mask)]
    knee_n = sum(1 for _ in knee_sensors)

    print(f"Knee suite:")
    print(f"  size       = {knee_n} sensors")
    print(f"  performance= {knee_perf:.3f}  (normalised NDCI coverage)")
    print(f"  cost       = ${knee_cost:.1f}k")
    print(f"  MTBF       = {knee_mtbf:.0f} kh\n")

    print(f"Knee composition:")
    by_sub = {s: 0 for s in SUBSYSTEMS}
    for s in knee_sensors:
        by_sub[s["subsystem"]] += 1
    for sub, n in by_sub.items():
        print(f"  {sub:8s} {n}")

    # Write the full Pareto front for the website's data pipeline.
    with open("pareto_front.csv", "w", newline="") as fh:
        w = csv.writer(fh)
        w.writerow(["suite_idx", "n_sensors", "performance", "cost_kUSD",
                    "mtbf_kh", "is_knee"])
        for i, (f, x) in enumerate(zip(front, masks)):
            xb = x.astype(bool)
            perf, cost, mtbf = evaluate_suite(xb, pool)
            w.writerow([i, int(xb.sum()), f"{perf:.4f}", f"{cost:.2f}",
                        f"{mtbf:.1f}", "yes" if i == knee else "no"])
    print(f"\nWrote pareto_front.csv ({n_front} suites).")


if __name__ == "__main__":
    main()