import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pspipe_utils import covariance, pspipe_list
from pspy import pspy_utils, so_cov, so_dict
from scipy import stats
from tqdm.auto import tqdm

sns.set_theme(style="ticks", rc={f"axes.spines.{spine}": False for spine in ["top", "right"]})


class Bunch:
    def __init__(self, **kwds):
        self.update(**kwds)

    def update(self, **kwds):
        self.__dict__.update(kwds)

    def setdefault(self, key, default=None):
        return self.__dict__.setdefault(key, default)

    def __repr__(self):
        return self.__dict__.__repr__()

    def __str__(self):
        return self.__dict__.__str__()

    def get(self, key):
        return self.__dict__.get(key)


product_dir = "/pscratch/sd/x/xgarrido/analysis/act/products/dr6/mnms"

get_path = lambda name: os.path.join(product_dir, name)
cov_dir = get_path("covariances")
sim_spec_dir = get_path("sim_spectra")
mcm_dir = get_path("mcms")
bestfit_dir = get_path("best_fits")

get_covariance = lambda name: np.load(os.path.join(cov_dir, f"x_ar_{name}_cov_TT_TE_ET_EE.npy"))

cases = {
    "analytic": Bunch(cov=get_covariance("analytic")),
    "mc": Bunch(cov=get_covariance("mc")),
}
cases["skew"] = Bunch(
    cov=so_cov.corr2cov(np.load("skew_corr/skew_corr_clean.npy"), cases["mc"].cov.diagonal())
)

cov_copy = cases["analytic"].cov.copy()
np.fill_diagonal(cov_copy, cases["mc"].cov.diagonal())
cases["mc_analytic"] = Bunch(cov=cov_copy)

for size in range(200, 1500, 200):
    cases[f"skew size {size}"] = Bunch(
        cov=so_cov.corr2cov(
            np.load(f"skew_corr/skew_corr_clean_size_{size}.npy"),
            np.load(f"skew_corr/x_ar_mc_cov_size_{size}.npy").diagonal(),
        )
    )

for size in range(200, 900, 200):
    nkeeps = list(range(5, 35, 5)) + list(range(40, 60, 10))
    for nkeep in nkeeps:
        cases[f"size {size} - nmode {nkeep}"] = Bunch(
            cov=so_cov.corr2cov(
                np.load(f"skew_corr/skew_corr_clean_size_{size}_nkeep_{nkeep}.npy"),
                np.load(f"skew_corr/x_ar_mc_cov_size_{size}.npy").diagonal(),
            )
        )


np.abs(cases["mc"].cov.diagonal() - cases["skew"].cov.diagonal()).max()

9.313225746154785e-10


for k in range(1, 4):
    fig, ax = plt.subplots()
    for name, color in {"mc": "0.6", "analytic": "k", "skew": "r"}.items():
        ax.plot(np.diag(so_cov.cov2corr(cases[name].cov), k=k), label=name, color=color)
    ax.set(xlim=[0, 300], ylim=[-0.1, 0.05])
    ax.legend(title=f"{k}-diagonal");


d = so_dict.so_dict()
d.read_from_file(get_path("global_dr6_v4_updated.dict"))

binning_file = d["binning_file"]
lmax = d["lmax"]

spec_name_list = pspipe_list.get_spec_name_list(d, delimiter="_")
spectra_order = ["TT", "TE", "ET", "EE"]

spectra_cuts = {
    "dr6_pa4_f220": dict(T=[1000, lmax], P=[lmax, lmax]),
    "dr6_pa5_f150": dict(T=[800, lmax], P=[500, lmax]),
    "dr6_pa6_f150": dict(T=[600, lmax], P=[500, lmax]),
    "dr6_pa5_f090": dict(T=[1000, lmax], P=[500, lmax]),
    "dr6_pa6_f090": dict(T=[1000, lmax], P=[500, lmax]),
}

theory_vec = covariance.read_x_ar_theory_vec(
    bestfit_dir, mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)

bin_low, bin_high, *_ = pspy_utils.read_binning_file(binning_file, lmax)
indices = covariance.get_indices(
    bin_low,
    bin_high,
    spec_name_list,
    spectra_cuts=spectra_cuts,
    spectra_order=spectra_order,
)
for case, bunch in cases.items():
    bunch.update(invcov=np.linalg.inv(bunch.cov[np.ix_(indices, indices)]))

for i in (pbar := tqdm(range(1000))):
    pbar.set_description(f"Simulation n°{i}")
    data_vec = covariance.read_x_ar_spectra_vec(
        sim_spec_dir,
        spec_name_list,
        f"cross_{i:05d}",
        spectra_order=spectra_order,
        type=d["type"],
    )

    for case, bunch in cases.items():
        res = data_vec[indices] - theory_vec[indices]
        chi2 = res @ bunch.invcov @ res
        bunch.setdefault("chi2", []).append(chi2)

  0%|          | 0/1000 [00:00<?, ?it/s]


ndof = len(indices)

chi2 = {
    k: v.chi2 for k, v in cases.items() if k in ["analytic", "mc_analytic", "skew", "skew full"]
}

fig, ax = plt.subplots()
sns.histplot(data=chi2, fill=True, kde=False, ax=ax, stat="density", common_norm=False)
ax.axvline(ndof, color="gray", ls="dashed")
ax.set(xlabel=r"$\chi^2$")

x = np.linspace(0.8 * ndof, 1.2 * ndof, 300)
y_pdf = stats.chi2.pdf(x, ndof)
ax.plot(x, y_pdf, color="gray");


print(ndof)
pd.DataFrame(chi2)

1694


chi2 = {k: v.chi2 for k, v in cases.items() if "skew size" in k}

fig, ax = plt.subplots()
sns.kdeplot(data=chi2, ax=ax, common_norm=True)
ax.axvline(ndof, color="gray")
ax.set(xlabel=r"$\chi^2$");


fig, ax = plt.subplots()
table = []
for name, bunch in cases.items():
    if "skew size" not in name:
        continue
    eigenvalues, eigenvectors = np.linalg.eig(bunch.cov)
    ax.semilogy(eigenvalues)
    table += [(name.split("size")[-1], np.min(eigenvalues), np.max(eigenvalues))]


pd.DataFrame(table, columns=["nsim", "min λ", "max λ"])


for size in range(200, 900, 200):
    keys = [f"size {size} - nmode {nmode}" for nmode in nkeeps]
    chi2 = {k: v.chi2 for k, v in cases.items() if k in keys}

    fig, ax = plt.subplots()
    sns.kdeplot(data=cases["skew"].chi2, ax=ax, common_norm=False, color="black")
    sns.kdeplot(data=chi2, ax=ax, common_norm=False)
    ax.axvline(ndof, color="gray", ls="dashed")
    # ax.plot(x, y_pdf, color="gray")
    ax.set(xlabel=r"$\chi^2$");


analytic = dict(cov=np.load(os.path.join(cov_dir, "x_ar_analytic_cov.npy")))
full_skew = dict(
    cov=so_cov.corr2cov(
        np.load("skew_corr/skew_corr_clean_full.npy"),
        np.load(os.path.join(cov_dir, "x_ar_mc_cov.npy")).diagonal(),
    )
)

spectra_order = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
combinations = [spectra_order, ["TT", "TE", "ET", "EE"]] + spectra_order
for c in combinations:
    name = ",".join(c) if isinstance(c, list) else c
    indices = covariance.get_indices(
        bin_low,
        bin_high,
        spec_name_list,
        spectra_cuts=spectra_cuts,
        spectra_order=spectra_order,
        selected_spectra=c,
    )
    invcov = np.linalg.inv(full_skew.get("cov")[np.ix_(indices, indices)])
    full_skew.setdefault("modes", {}).setdefault(name, {}).update(indices=indices, invcov=invcov)
    invcov = np.linalg.inv(analytic.get("cov")[np.ix_(indices, indices)])
    analytic.setdefault("modes", {}).setdefault(name, {}).update(indices=indices, invcov=invcov)

theory_vec = covariance.read_x_ar_theory_vec(
    bestfit_dir, mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
for i in (pbar := tqdm(range(1000))):
    pbar.set_description(f"Simulation n°{i}")
    data_vec = covariance.read_x_ar_spectra_vec(
        sim_spec_dir,
        spec_name_list,
        f"cross_{i:05d}",
        spectra_order=spectra_order,
        type=d["type"],
    )
    for mode in full_skew.get("modes").keys():
        for v in [full_skew.get("modes")[mode], analytic.get("modes")[mode]]:
            invcov, indices = v.get("invcov"), v.get("indices")
            res = data_vec[indices] - theory_vec[indices]
            chi2 = res @ invcov @ res
            v.setdefault("chi2", []).append(chi2)

  0%|          | 0/1000 [00:00<?, ?it/s]


chi2 = {k: v.get("chi2") for k, v in full_skew.get("modes").items()}
data = pd.melt(pd.DataFrame(chi2))

g = sns.displot(
    data=data,
    x="value",
    col="variable",
    kde=False,
    legend=False,
    stat="density",
    common_norm=False,
    col_wrap=3,
    facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")

for ax in g.axes.flatten():
    mode = ax.title.get_text()
    ndof = len(full_skew["modes"][mode]["indices"])
    ax.axvline(ndof, color="gray", ls="dashed")

    xlim = (0.7 * ndof, 1.3 * ndof)
    x = np.linspace(*xlim, 300)
    y_pdf = stats.chi2.pdf(x, ndof)
    ax.plot(x, y_pdf, color="gray")
    ax.set_xlim(xlim)


cov = np.load(
    "/global/cfs/cdirs/act/data/zatkins/data/simonsobs/PSpipe/project/ana_cov_comp/cov_dr6_v4_20231128/covariances/x_ar_analytic_cov.npy"
)
zz_cases = {"zz": dict(cov=cov.copy())}

# np.fill_diagonal(cov, np.load(os.path.join(cov_dir, "x_ar_mc_cov.npy")).diagonal())
# zz_cases["mc diagonal"] = dict(cov=cov)

spectra_order = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
combinations = [spectra_order, ["TT", "TE", "ET", "EE"]] + spectra_order
for c in combinations:
    indices = covariance.get_indices(
        bin_low,
        bin_high,
        spec_name_list,
        spectra_cuts=spectra_cuts,
        spectra_order=spectra_order,
        selected_spectra=c,
    )
    for case in zz_cases.values():
        invcov = np.linalg.inv(case.get("cov")[np.ix_(indices, indices)])
        case.setdefault("modes", {}).setdefault(
            ",".join(c) if isinstance(c, list) else c, {}
        ).update(indices=indices, invcov=invcov)


theory_vec = covariance.read_x_ar_theory_vec(
    "./best_fits", mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
for i in (pbar := tqdm(range(1000))):
    pbar.set_description(f"Simulation n°{i}")
    data_vec = covariance.read_x_ar_spectra_vec(
        "/global/cfs/cdirs/act/data/tlouis/dr6v4/sim_spectra",
        spec_name_list,
        f"cross_{i:05d}",
        spectra_order=spectra_order,
        type=d["type"],
    )
    for case in zz_cases.values():
        for k, v in case.get("modes").items():
            invcov, indices = v.get("invcov"), v.get("indices")
            res = data_vec[indices] - theory_vec[indices]
            chi2 = res @ invcov @ res
            v.setdefault("chi2", []).append(chi2)

  0%|          | 0/1000 [00:00<?, ?it/s]


g = sns.displot(
    data=pd.concat(
        [
            pd.melt(pd.DataFrame({k: v.get("chi2") for k, v in case.get("modes").items()})).assign(
                kind=name
            )
            for name, case in zz_cases.items()
        ]
    ).reset_index(),
    x="value",
    col="variable",
    kde=False,
    hue="kind",
    stat="density",
    common_norm=False,
    col_wrap=3,
    facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")

for ax in g.axes.flatten():
    mode = ax.title.get_text()
    ndof = len(zz_cases["zz"]["modes"][mode]["indices"])
    ax.axvline(ndof, color="gray", ls="dashed")

    xlim = (0.7 * ndof, 1.3 * ndof)
    x = np.linspace(*xlim, 300)
    y_pdf = stats.chi2.pdf(x, ndof)
    ax.plot(x, y_pdf, color="gray")
    ax.set_xlim(xlim)


dfs = [
    pd.melt(pd.DataFrame({k: v.get("chi2") for k, v in case.get("modes").items()})).assign(
        kind=kind
    )
    for kind, case in {
        "skew": full_skew,
        "analytic": analytic,
        "zz": zz_cases["zz"],
    }.items()
]

g = sns.displot(
    data=pd.concat(dfs).reset_index(),
    x="value",
    col="variable",
    kde=False,
    hue="kind",
    stat="density",
    common_norm=False,
    col_wrap=4,
    facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")

for ax in g.axes.flatten():
    mode = ax.title.get_text()
    ndof = len(zz_cases["zz"]["modes"][mode]["indices"])
    ax.axvline(ndof, color="gray", ls="dashed")

    xlim = (0.7 * ndof, 1.3 * ndof)
    x = np.linspace(*xlim, 300)
    y_pdf = stats.chi2.pdf(x, ndof)
    ax.plot(x, y_pdf, color="gray")
    ax.set_xlim(xlim)

	analytic	skew	mc_analytic
0	2016.016694	1753.555221	1613.921295
1	1885.027846	1640.620832	1529.404014
2	2011.884156	1740.485435	1608.935671
3	1989.356982	1742.101094	1588.645976
4	2006.735719	1734.637519	1592.820116
...	...	...	...
995	1981.959209	1722.986240	1593.821487
996	1942.864699	1690.518606	1551.150381
997	1863.957996	1624.116667	1510.290705
998	2065.007504	1797.917773	1634.570015
999	1928.715904	1690.392101	1558.035023

	nsim	min λ	max λ
0	200	-36.117845	6.414579e+06
1	400	-4.096206	6.350186e+06
2	600	-0.804928	6.281688e+06
3	800	-0.142240	6.202395e+06
4	1000	-0.002993	6.098695e+06
5	1200	0.002411	6.192878e+06
6	1400	0.002592	6.072928e+06

Loading covariances¶

Computing $\chi^2$ distributions¶

$\chi^2$ distributions for a reduced set of MC simulations¶

Skewing the full matrix with T, E, B mode¶

Analytic cov. by Zach & Zack¶

Summary plot¶