import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pspipe_utils import covariance, pspipe_list
from pspy import pspy_utils, so_cov, so_dict
from scipy import stats
from tqdm.auto import tqdm

sns.set_theme(style="ticks", rc={f"axes.spines.{spine}": False for spine in ["top", "right"]})
class Bunch:
    def __init__(self, **kwds):
        self.update(**kwds)

    def update(self, **kwds):
        self.__dict__.update(kwds)

    def setdefault(self, key, default=None):
        return self.__dict__.setdefault(key, default)

    def __repr__(self):
        return self.__dict__.__repr__()

    def __str__(self):
        return self.__dict__.__str__()

    def get(self, key):
        return self.__dict__.get(key)

Loading covariances¶

product_dir = "/pscratch/sd/x/xgarrido/analysis/act/products/dr6/mnms"

get_path = lambda name: os.path.join(product_dir, name)
cov_dir = get_path("covariances")
sim_spec_dir = get_path("sim_spectra")
mcm_dir = get_path("mcms")
bestfit_dir = get_path("best_fits")

get_covariance = lambda name: np.load(os.path.join(cov_dir, f"x_ar_{name}_cov_TT_TE_ET_EE.npy"))

cases = {
    "analytic": Bunch(cov=get_covariance("analytic")),
    "mc": Bunch(cov=get_covariance("mc")),
}
cases["skew"] = Bunch(
    cov=so_cov.corr2cov(np.load("skew_corr/skew_corr_clean.npy"), cases["mc"].cov.diagonal())
)

cov_copy = cases["analytic"].cov.copy()
np.fill_diagonal(cov_copy, cases["mc"].cov.diagonal())
cases["mc_analytic"] = Bunch(cov=cov_copy)

for size in range(200, 1500, 200):
    cases[f"skew size {size}"] = Bunch(
        cov=so_cov.corr2cov(
            np.load(f"skew_corr/skew_corr_clean_size_{size}.npy"),
            np.load(f"skew_corr/x_ar_mc_cov_size_{size}.npy").diagonal(),
        )
    )

for size in range(200, 900, 200):
    nkeeps = list(range(5, 35, 5)) + list(range(40, 60, 10))
    for nkeep in nkeeps:
        cases[f"size {size} - nmode {nkeep}"] = Bunch(
            cov=so_cov.corr2cov(
                np.load(f"skew_corr/skew_corr_clean_size_{size}_nkeep_{nkeep}.npy"),
                np.load(f"skew_corr/x_ar_mc_cov_size_{size}.npy").diagonal(),
            )
        )

Let's check the diagonals of the different skew matrices

np.abs(cases["mc"].cov.diagonal() - cases["skew"].cov.diagonal()).max()
Out[4]:
9.313225746154785e-10
for k in range(1, 4):
    fig, ax = plt.subplots()
    for name, color in {"mc": "0.6", "analytic": "k", "skew": "r"}.items():
        ax.plot(np.diag(so_cov.cov2corr(cases[name].cov), k=k), label=name, color=color)
    ax.set(xlim=[0, 300], ylim=[-0.1, 0.05])
    ax.legend(title=f"{k}-diagonal");
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Computing $\chi^2$ distributions¶

We use the multipole cuts used in the analysis of the spectra

d = so_dict.so_dict()
d.read_from_file(get_path("global_dr6_v4_updated.dict"))

binning_file = d["binning_file"]
lmax = d["lmax"]

spec_name_list = pspipe_list.get_spec_name_list(d, delimiter="_")
spectra_order = ["TT", "TE", "ET", "EE"]

spectra_cuts = {
    "dr6_pa4_f220": dict(T=[1000, lmax], P=[lmax, lmax]),
    "dr6_pa5_f150": dict(T=[800, lmax], P=[500, lmax]),
    "dr6_pa6_f150": dict(T=[600, lmax], P=[500, lmax]),
    "dr6_pa5_f090": dict(T=[1000, lmax], P=[500, lmax]),
    "dr6_pa6_f090": dict(T=[1000, lmax], P=[500, lmax]),
}

theory_vec = covariance.read_x_ar_theory_vec(
    bestfit_dir, mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)

bin_low, bin_high, *_ = pspy_utils.read_binning_file(binning_file, lmax)
indices = covariance.get_indices(
    bin_low,
    bin_high,
    spec_name_list,
    spectra_cuts=spectra_cuts,
    spectra_order=spectra_order,
)
for case, bunch in cases.items():
    bunch.update(invcov=np.linalg.inv(bunch.cov[np.ix_(indices, indices)]))

for i in (pbar := tqdm(range(1000))):
    pbar.set_description(f"Simulation n°{i}")
    data_vec = covariance.read_x_ar_spectra_vec(
        sim_spec_dir,
        spec_name_list,
        f"cross_{i:05d}",
        spectra_order=spectra_order,
        type=d["type"],
    )

    for case, bunch in cases.items():
        res = data_vec[indices] - theory_vec[indices]
        chi2 = res @ bunch.invcov @ res
        bunch.setdefault("chi2", []).append(chi2)
  0%|          | 0/1000 [00:00<?, ?it/s]
ndof = len(indices)

chi2 = {
    k: v.chi2 for k, v in cases.items() if k in ["analytic", "mc_analytic", "skew", "skew full"]
}

fig, ax = plt.subplots()
sns.histplot(data=chi2, fill=True, kde=False, ax=ax, stat="density", common_norm=False)
ax.axvline(ndof, color="gray", ls="dashed")
ax.set(xlabel=r"$\chi^2$")

x = np.linspace(0.8 * ndof, 1.2 * ndof, 300)
y_pdf = stats.chi2.pdf(x, ndof)
ax.plot(x, y_pdf, color="gray");
No description has been provided for this image
print(ndof)
pd.DataFrame(chi2)
1694
Out[8]:
analytic skew mc_analytic
0 2016.016694 1753.555221 1613.921295
1 1885.027846 1640.620832 1529.404014
2 2011.884156 1740.485435 1608.935671
3 1989.356982 1742.101094 1588.645976
4 2006.735719 1734.637519 1592.820116
... ... ... ...
995 1981.959209 1722.986240 1593.821487
996 1942.864699 1690.518606 1551.150381
997 1863.957996 1624.116667 1510.290705
998 2065.007504 1797.917773 1634.570015
999 1928.715904 1690.392101 1558.035023

1000 rows × 3 columns

$\chi^2$ distributions for a reduced set of MC simulations¶

Now we check the $\chi^2$ distributions given the amount of MC simulation generated

chi2 = {k: v.chi2 for k, v in cases.items() if "skew size" in k}

fig, ax = plt.subplots()
sns.kdeplot(data=chi2, ax=ax, common_norm=True)
ax.axvline(ndof, color="gray")
ax.set(xlabel=r"$\chi^2$");
No description has been provided for this image

We can also have a look to the eigenvalues

fig, ax = plt.subplots()
table = []
for name, bunch in cases.items():
    if "skew size" not in name:
        continue
    eigenvalues, eigenvectors = np.linalg.eig(bunch.cov)
    ax.semilogy(eigenvalues)
    table += [(name.split("size")[-1], np.min(eigenvalues), np.max(eigenvalues))]
No description has been provided for this image
pd.DataFrame(table, columns=["nsim", "min λ", "max λ"])
Out[11]:
nsim min λ max λ
0 200 -36.117845 6.414579e+06
1 400 -4.096206 6.350186e+06
2 600 -0.804928 6.281688e+06
3 800 -0.142240 6.202395e+06
4 1000 -0.002993 6.098695e+06
5 1200 0.002411 6.192878e+06
6 1400 0.002592 6.072928e+06

Let's also play with the number of selected modes. The idea is can we recover a correct $\chi^2$ distributions for a limited set of MC simulations by adjusting the number of mode when doing the SVD reduction.

for size in range(200, 900, 200):
    keys = [f"size {size} - nmode {nmode}" for nmode in nkeeps]
    chi2 = {k: v.chi2 for k, v in cases.items() if k in keys}

    fig, ax = plt.subplots()
    sns.kdeplot(data=cases["skew"].chi2, ax=ax, common_norm=False, color="black")
    sns.kdeplot(data=chi2, ax=ax, common_norm=False)
    ax.axvline(ndof, color="gray", ls="dashed")
    # ax.plot(x, y_pdf, color="gray")
    ax.set(xlabel=r"$\chi^2$");
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Skewing the full matrix with T, E, B mode¶

analytic = dict(cov=np.load(os.path.join(cov_dir, "x_ar_analytic_cov.npy")))
full_skew = dict(
    cov=so_cov.corr2cov(
        np.load("skew_corr/skew_corr_clean_full.npy"),
        np.load(os.path.join(cov_dir, "x_ar_mc_cov.npy")).diagonal(),
    )
)

spectra_order = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
combinations = [spectra_order, ["TT", "TE", "ET", "EE"]] + spectra_order
for c in combinations:
    name = ",".join(c) if isinstance(c, list) else c
    indices = covariance.get_indices(
        bin_low,
        bin_high,
        spec_name_list,
        spectra_cuts=spectra_cuts,
        spectra_order=spectra_order,
        selected_spectra=c,
    )
    invcov = np.linalg.inv(full_skew.get("cov")[np.ix_(indices, indices)])
    full_skew.setdefault("modes", {}).setdefault(name, {}).update(indices=indices, invcov=invcov)
    invcov = np.linalg.inv(analytic.get("cov")[np.ix_(indices, indices)])
    analytic.setdefault("modes", {}).setdefault(name, {}).update(indices=indices, invcov=invcov)

theory_vec = covariance.read_x_ar_theory_vec(
    bestfit_dir, mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
for i in (pbar := tqdm(range(1000))):
    pbar.set_description(f"Simulation n°{i}")
    data_vec = covariance.read_x_ar_spectra_vec(
        sim_spec_dir,
        spec_name_list,
        f"cross_{i:05d}",
        spectra_order=spectra_order,
        type=d["type"],
    )
    for mode in full_skew.get("modes").keys():
        for v in [full_skew.get("modes")[mode], analytic.get("modes")[mode]]:
            invcov, indices = v.get("invcov"), v.get("indices")
            res = data_vec[indices] - theory_vec[indices]
            chi2 = res @ invcov @ res
            v.setdefault("chi2", []).append(chi2)
  0%|          | 0/1000 [00:00<?, ?it/s]
chi2 = {k: v.get("chi2") for k, v in full_skew.get("modes").items()}
data = pd.melt(pd.DataFrame(chi2))

g = sns.displot(
    data=data,
    x="value",
    col="variable",
    kde=False,
    legend=False,
    stat="density",
    common_norm=False,
    col_wrap=3,
    facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")

for ax in g.axes.flatten():
    mode = ax.title.get_text()
    ndof = len(full_skew["modes"][mode]["indices"])
    ax.axvline(ndof, color="gray", ls="dashed")

    xlim = (0.7 * ndof, 1.3 * ndof)
    x = np.linspace(*xlim, 300)
    y_pdf = stats.chi2.pdf(x, ndof)
    ax.plot(x, y_pdf, color="gray")
    ax.set_xlim(xlim)
No description has been provided for this image

Analytic cov. by Zach & Zack¶

cov = np.load(
    "/global/cfs/cdirs/act/data/zatkins/data/simonsobs/PSpipe/project/ana_cov_comp/cov_dr6_v4_20231128/covariances/x_ar_analytic_cov.npy"
)
zz_cases = {"zz": dict(cov=cov.copy())}

# np.fill_diagonal(cov, np.load(os.path.join(cov_dir, "x_ar_mc_cov.npy")).diagonal())
# zz_cases["mc diagonal"] = dict(cov=cov)

spectra_order = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
combinations = [spectra_order, ["TT", "TE", "ET", "EE"]] + spectra_order
for c in combinations:
    indices = covariance.get_indices(
        bin_low,
        bin_high,
        spec_name_list,
        spectra_cuts=spectra_cuts,
        spectra_order=spectra_order,
        selected_spectra=c,
    )
    for case in zz_cases.values():
        invcov = np.linalg.inv(case.get("cov")[np.ix_(indices, indices)])
        case.setdefault("modes", {}).setdefault(
            ",".join(c) if isinstance(c, list) else c, {}
        ).update(indices=indices, invcov=invcov)
theory_vec = covariance.read_x_ar_theory_vec(
    "./best_fits", mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
for i in (pbar := tqdm(range(1000))):
    pbar.set_description(f"Simulation n°{i}")
    data_vec = covariance.read_x_ar_spectra_vec(
        "/global/cfs/cdirs/act/data/tlouis/dr6v4/sim_spectra",
        spec_name_list,
        f"cross_{i:05d}",
        spectra_order=spectra_order,
        type=d["type"],
    )
    for case in zz_cases.values():
        for k, v in case.get("modes").items():
            invcov, indices = v.get("invcov"), v.get("indices")
            res = data_vec[indices] - theory_vec[indices]
            chi2 = res @ invcov @ res
            v.setdefault("chi2", []).append(chi2)
  0%|          | 0/1000 [00:00<?, ?it/s]
g = sns.displot(
    data=pd.concat(
        [
            pd.melt(pd.DataFrame({k: v.get("chi2") for k, v in case.get("modes").items()})).assign(
                kind=name
            )
            for name, case in zz_cases.items()
        ]
    ).reset_index(),
    x="value",
    col="variable",
    kde=False,
    hue="kind",
    stat="density",
    common_norm=False,
    col_wrap=3,
    facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")

for ax in g.axes.flatten():
    mode = ax.title.get_text()
    ndof = len(zz_cases["zz"]["modes"][mode]["indices"])
    ax.axvline(ndof, color="gray", ls="dashed")

    xlim = (0.7 * ndof, 1.3 * ndof)
    x = np.linspace(*xlim, 300)
    y_pdf = stats.chi2.pdf(x, ndof)
    ax.plot(x, y_pdf, color="gray")
    ax.set_xlim(xlim)
No description has been provided for this image

Summary plot¶

dfs = [
    pd.melt(pd.DataFrame({k: v.get("chi2") for k, v in case.get("modes").items()})).assign(
        kind=kind
    )
    for kind, case in {
        "skew": full_skew,
        "analytic": analytic,
        "zz": zz_cases["zz"],
    }.items()
]

g = sns.displot(
    data=pd.concat(dfs).reset_index(),
    x="value",
    col="variable",
    kde=False,
    hue="kind",
    stat="density",
    common_norm=False,
    col_wrap=4,
    facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")

for ax in g.axes.flatten():
    mode = ax.title.get_text()
    ndof = len(zz_cases["zz"]["modes"][mode]["indices"])
    ax.axvline(ndof, color="gray", ls="dashed")

    xlim = (0.7 * ndof, 1.3 * ndof)
    x = np.linspace(*xlim, 300)
    y_pdf = stats.chi2.pdf(x, ndof)
    ax.plot(x, y_pdf, color="gray")
    ax.set_xlim(xlim)
No description has been provided for this image
 
last update 2024-10-03 - 11:03:13 PM PDT