import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pspipe_utils import covariance, pspipe_list
from pspy import pspy_utils, so_cov, so_dict
from scipy import stats
from tqdm.auto import tqdm
sns.set_theme(style="ticks", rc={f"axes.spines.{spine}": False for spine in ["top", "right"]})
class Bunch:
def __init__(self, **kwds):
self.update(**kwds)
def update(self, **kwds):
self.__dict__.update(kwds)
def setdefault(self, key, default=None):
return self.__dict__.setdefault(key, default)
def __repr__(self):
return self.__dict__.__repr__()
def __str__(self):
return self.__dict__.__str__()
def get(self, key):
return self.__dict__.get(key)
Loading covariances¶
product_dir = "/pscratch/sd/x/xgarrido/analysis/act/products/dr6/mnms"
get_path = lambda name: os.path.join(product_dir, name)
cov_dir = get_path("covariances")
sim_spec_dir = get_path("sim_spectra")
mcm_dir = get_path("mcms")
bestfit_dir = get_path("best_fits")
get_covariance = lambda name: np.load(os.path.join(cov_dir, f"x_ar_{name}_cov_TT_TE_ET_EE.npy"))
cases = {
"analytic": Bunch(cov=get_covariance("analytic")),
"mc": Bunch(cov=get_covariance("mc")),
}
cases["skew"] = Bunch(
cov=so_cov.corr2cov(np.load("skew_corr/skew_corr_clean.npy"), cases["mc"].cov.diagonal())
)
cov_copy = cases["analytic"].cov.copy()
np.fill_diagonal(cov_copy, cases["mc"].cov.diagonal())
cases["mc_analytic"] = Bunch(cov=cov_copy)
for size in range(200, 1500, 200):
cases[f"skew size {size}"] = Bunch(
cov=so_cov.corr2cov(
np.load(f"skew_corr/skew_corr_clean_size_{size}.npy"),
np.load(f"skew_corr/x_ar_mc_cov_size_{size}.npy").diagonal(),
)
)
for size in range(200, 900, 200):
nkeeps = list(range(5, 35, 5)) + list(range(40, 60, 10))
for nkeep in nkeeps:
cases[f"size {size} - nmode {nkeep}"] = Bunch(
cov=so_cov.corr2cov(
np.load(f"skew_corr/skew_corr_clean_size_{size}_nkeep_{nkeep}.npy"),
np.load(f"skew_corr/x_ar_mc_cov_size_{size}.npy").diagonal(),
)
)
Let's check the diagonals of the different skew matrices
np.abs(cases["mc"].cov.diagonal() - cases["skew"].cov.diagonal()).max()
Out[4]:
9.313225746154785e-10
for k in range(1, 4):
fig, ax = plt.subplots()
for name, color in {"mc": "0.6", "analytic": "k", "skew": "r"}.items():
ax.plot(np.diag(so_cov.cov2corr(cases[name].cov), k=k), label=name, color=color)
ax.set(xlim=[0, 300], ylim=[-0.1, 0.05])
ax.legend(title=f"{k}-diagonal");
Computing $\chi^2$ distributions¶
We use the multipole cuts used in the analysis of the spectra
d = so_dict.so_dict()
d.read_from_file(get_path("global_dr6_v4_updated.dict"))
binning_file = d["binning_file"]
lmax = d["lmax"]
spec_name_list = pspipe_list.get_spec_name_list(d, delimiter="_")
spectra_order = ["TT", "TE", "ET", "EE"]
spectra_cuts = {
"dr6_pa4_f220": dict(T=[1000, lmax], P=[lmax, lmax]),
"dr6_pa5_f150": dict(T=[800, lmax], P=[500, lmax]),
"dr6_pa6_f150": dict(T=[600, lmax], P=[500, lmax]),
"dr6_pa5_f090": dict(T=[1000, lmax], P=[500, lmax]),
"dr6_pa6_f090": dict(T=[1000, lmax], P=[500, lmax]),
}
theory_vec = covariance.read_x_ar_theory_vec(
bestfit_dir, mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
bin_low, bin_high, *_ = pspy_utils.read_binning_file(binning_file, lmax)
indices = covariance.get_indices(
bin_low,
bin_high,
spec_name_list,
spectra_cuts=spectra_cuts,
spectra_order=spectra_order,
)
for case, bunch in cases.items():
bunch.update(invcov=np.linalg.inv(bunch.cov[np.ix_(indices, indices)]))
for i in (pbar := tqdm(range(1000))):
pbar.set_description(f"Simulation n°{i}")
data_vec = covariance.read_x_ar_spectra_vec(
sim_spec_dir,
spec_name_list,
f"cross_{i:05d}",
spectra_order=spectra_order,
type=d["type"],
)
for case, bunch in cases.items():
res = data_vec[indices] - theory_vec[indices]
chi2 = res @ bunch.invcov @ res
bunch.setdefault("chi2", []).append(chi2)
0%| | 0/1000 [00:00<?, ?it/s]
ndof = len(indices)
chi2 = {
k: v.chi2 for k, v in cases.items() if k in ["analytic", "mc_analytic", "skew", "skew full"]
}
fig, ax = plt.subplots()
sns.histplot(data=chi2, fill=True, kde=False, ax=ax, stat="density", common_norm=False)
ax.axvline(ndof, color="gray", ls="dashed")
ax.set(xlabel=r"$\chi^2$")
x = np.linspace(0.8 * ndof, 1.2 * ndof, 300)
y_pdf = stats.chi2.pdf(x, ndof)
ax.plot(x, y_pdf, color="gray");
print(ndof)
pd.DataFrame(chi2)
1694
Out[8]:
| analytic | skew | mc_analytic | |
|---|---|---|---|
| 0 | 2016.016694 | 1753.555221 | 1613.921295 |
| 1 | 1885.027846 | 1640.620832 | 1529.404014 |
| 2 | 2011.884156 | 1740.485435 | 1608.935671 |
| 3 | 1989.356982 | 1742.101094 | 1588.645976 |
| 4 | 2006.735719 | 1734.637519 | 1592.820116 |
| ... | ... | ... | ... |
| 995 | 1981.959209 | 1722.986240 | 1593.821487 |
| 996 | 1942.864699 | 1690.518606 | 1551.150381 |
| 997 | 1863.957996 | 1624.116667 | 1510.290705 |
| 998 | 2065.007504 | 1797.917773 | 1634.570015 |
| 999 | 1928.715904 | 1690.392101 | 1558.035023 |
1000 rows × 3 columns
$\chi^2$ distributions for a reduced set of MC simulations¶
Now we check the $\chi^2$ distributions given the amount of MC simulation generated
chi2 = {k: v.chi2 for k, v in cases.items() if "skew size" in k}
fig, ax = plt.subplots()
sns.kdeplot(data=chi2, ax=ax, common_norm=True)
ax.axvline(ndof, color="gray")
ax.set(xlabel=r"$\chi^2$");
We can also have a look to the eigenvalues
fig, ax = plt.subplots()
table = []
for name, bunch in cases.items():
if "skew size" not in name:
continue
eigenvalues, eigenvectors = np.linalg.eig(bunch.cov)
ax.semilogy(eigenvalues)
table += [(name.split("size")[-1], np.min(eigenvalues), np.max(eigenvalues))]
pd.DataFrame(table, columns=["nsim", "min λ", "max λ"])
Out[11]:
| nsim | min λ | max λ | |
|---|---|---|---|
| 0 | 200 | -36.117845 | 6.414579e+06 |
| 1 | 400 | -4.096206 | 6.350186e+06 |
| 2 | 600 | -0.804928 | 6.281688e+06 |
| 3 | 800 | -0.142240 | 6.202395e+06 |
| 4 | 1000 | -0.002993 | 6.098695e+06 |
| 5 | 1200 | 0.002411 | 6.192878e+06 |
| 6 | 1400 | 0.002592 | 6.072928e+06 |
Let's also play with the number of selected modes. The idea is can we recover a correct $\chi^2$ distributions for a limited set of MC simulations by adjusting the number of mode when doing the SVD reduction.
for size in range(200, 900, 200):
keys = [f"size {size} - nmode {nmode}" for nmode in nkeeps]
chi2 = {k: v.chi2 for k, v in cases.items() if k in keys}
fig, ax = plt.subplots()
sns.kdeplot(data=cases["skew"].chi2, ax=ax, common_norm=False, color="black")
sns.kdeplot(data=chi2, ax=ax, common_norm=False)
ax.axvline(ndof, color="gray", ls="dashed")
# ax.plot(x, y_pdf, color="gray")
ax.set(xlabel=r"$\chi^2$");
Skewing the full matrix with T, E, B mode¶
analytic = dict(cov=np.load(os.path.join(cov_dir, "x_ar_analytic_cov.npy")))
full_skew = dict(
cov=so_cov.corr2cov(
np.load("skew_corr/skew_corr_clean_full.npy"),
np.load(os.path.join(cov_dir, "x_ar_mc_cov.npy")).diagonal(),
)
)
spectra_order = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
combinations = [spectra_order, ["TT", "TE", "ET", "EE"]] + spectra_order
for c in combinations:
name = ",".join(c) if isinstance(c, list) else c
indices = covariance.get_indices(
bin_low,
bin_high,
spec_name_list,
spectra_cuts=spectra_cuts,
spectra_order=spectra_order,
selected_spectra=c,
)
invcov = np.linalg.inv(full_skew.get("cov")[np.ix_(indices, indices)])
full_skew.setdefault("modes", {}).setdefault(name, {}).update(indices=indices, invcov=invcov)
invcov = np.linalg.inv(analytic.get("cov")[np.ix_(indices, indices)])
analytic.setdefault("modes", {}).setdefault(name, {}).update(indices=indices, invcov=invcov)
theory_vec = covariance.read_x_ar_theory_vec(
bestfit_dir, mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
for i in (pbar := tqdm(range(1000))):
pbar.set_description(f"Simulation n°{i}")
data_vec = covariance.read_x_ar_spectra_vec(
sim_spec_dir,
spec_name_list,
f"cross_{i:05d}",
spectra_order=spectra_order,
type=d["type"],
)
for mode in full_skew.get("modes").keys():
for v in [full_skew.get("modes")[mode], analytic.get("modes")[mode]]:
invcov, indices = v.get("invcov"), v.get("indices")
res = data_vec[indices] - theory_vec[indices]
chi2 = res @ invcov @ res
v.setdefault("chi2", []).append(chi2)
0%| | 0/1000 [00:00<?, ?it/s]
chi2 = {k: v.get("chi2") for k, v in full_skew.get("modes").items()}
data = pd.melt(pd.DataFrame(chi2))
g = sns.displot(
data=data,
x="value",
col="variable",
kde=False,
legend=False,
stat="density",
common_norm=False,
col_wrap=3,
facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")
for ax in g.axes.flatten():
mode = ax.title.get_text()
ndof = len(full_skew["modes"][mode]["indices"])
ax.axvline(ndof, color="gray", ls="dashed")
xlim = (0.7 * ndof, 1.3 * ndof)
x = np.linspace(*xlim, 300)
y_pdf = stats.chi2.pdf(x, ndof)
ax.plot(x, y_pdf, color="gray")
ax.set_xlim(xlim)
Analytic cov. by Zach & Zack¶
cov = np.load(
"/global/cfs/cdirs/act/data/zatkins/data/simonsobs/PSpipe/project/ana_cov_comp/cov_dr6_v4_20231128/covariances/x_ar_analytic_cov.npy"
)
zz_cases = {"zz": dict(cov=cov.copy())}
# np.fill_diagonal(cov, np.load(os.path.join(cov_dir, "x_ar_mc_cov.npy")).diagonal())
# zz_cases["mc diagonal"] = dict(cov=cov)
spectra_order = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
combinations = [spectra_order, ["TT", "TE", "ET", "EE"]] + spectra_order
for c in combinations:
indices = covariance.get_indices(
bin_low,
bin_high,
spec_name_list,
spectra_cuts=spectra_cuts,
spectra_order=spectra_order,
selected_spectra=c,
)
for case in zz_cases.values():
invcov = np.linalg.inv(case.get("cov")[np.ix_(indices, indices)])
case.setdefault("modes", {}).setdefault(
",".join(c) if isinstance(c, list) else c, {}
).update(indices=indices, invcov=invcov)
theory_vec = covariance.read_x_ar_theory_vec(
"./best_fits", mcm_dir, spec_name_list, lmax, spectra_order=spectra_order
)
for i in (pbar := tqdm(range(1000))):
pbar.set_description(f"Simulation n°{i}")
data_vec = covariance.read_x_ar_spectra_vec(
"/global/cfs/cdirs/act/data/tlouis/dr6v4/sim_spectra",
spec_name_list,
f"cross_{i:05d}",
spectra_order=spectra_order,
type=d["type"],
)
for case in zz_cases.values():
for k, v in case.get("modes").items():
invcov, indices = v.get("invcov"), v.get("indices")
res = data_vec[indices] - theory_vec[indices]
chi2 = res @ invcov @ res
v.setdefault("chi2", []).append(chi2)
0%| | 0/1000 [00:00<?, ?it/s]
g = sns.displot(
data=pd.concat(
[
pd.melt(pd.DataFrame({k: v.get("chi2") for k, v in case.get("modes").items()})).assign(
kind=name
)
for name, case in zz_cases.items()
]
).reset_index(),
x="value",
col="variable",
kde=False,
hue="kind",
stat="density",
common_norm=False,
col_wrap=3,
facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")
for ax in g.axes.flatten():
mode = ax.title.get_text()
ndof = len(zz_cases["zz"]["modes"][mode]["indices"])
ax.axvline(ndof, color="gray", ls="dashed")
xlim = (0.7 * ndof, 1.3 * ndof)
x = np.linspace(*xlim, 300)
y_pdf = stats.chi2.pdf(x, ndof)
ax.plot(x, y_pdf, color="gray")
ax.set_xlim(xlim)
Summary plot¶
dfs = [
pd.melt(pd.DataFrame({k: v.get("chi2") for k, v in case.get("modes").items()})).assign(
kind=kind
)
for kind, case in {
"skew": full_skew,
"analytic": analytic,
"zz": zz_cases["zz"],
}.items()
]
g = sns.displot(
data=pd.concat(dfs).reset_index(),
x="value",
col="variable",
kde=False,
hue="kind",
stat="density",
common_norm=False,
col_wrap=4,
facet_kws={"sharey": False, "sharex": False},
)
g.set_axis_labels("$\chi^2$")
g.set_titles("{col_name}")
for ax in g.axes.flatten():
mode = ax.title.get_text()
ndof = len(zz_cases["zz"]["modes"][mode]["indices"])
ax.axvline(ndof, color="gray", ls="dashed")
xlim = (0.7 * ndof, 1.3 * ndof)
x = np.linspace(*xlim, 300)
y_pdf = stats.chi2.pdf(x, ndof)
ax.plot(x, y_pdf, color="gray")
ax.set_xlim(xlim)