Evaluation#
This notebook serves to showcase how the evaluation pipeline is performed. Full script can be found at eval.py
%load_ext autoreload
%autoreload 2
import copy
import numpy as np
import xarray as xr
from causaldynamics.scm import create_scm_graph
from causaldynamics.plot import plot_scm
from causaldynamics.score import score
from causaldynamics.baselines import PCMCIPlus, FPCMCI, VARLiNGAM, DYNOTEARS, NGC_LSTM, TSCI, CUTSPlus, RCD, GIN, GRASP, TCDF
from tqdm import tqdm
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
/burg/glab/users/jn2808/.conda/envs/kausal/lib/python3.10/site-packages/jaxtyping/__init__.py:231: UserWarning: jaxtyping version >=0.2.23 should be used with Equinox version >=0.11.1
warnings.warn(
Complete script to generate the dataset can be found under scripts/generate_<system>.py.
Otherwise, follow our instructions to download and process the pre-generated dataset from https://huggingface.co/datasets/kausable/CausalDynamics
# Once you have the dataset, either generated or downloaded
# Notice that the dataset has `time_series` and `adjacency_matrix` variables.
DATA_DIR = Path("../data/simple/noise=0.00_confounder=False/data") # (you can change this to your own path)
ds = xr.open_dataset(DATA_DIR / "Rossler_N10_T1000.nc")
# Extract timeseries and adjacency matrix as target
timeseries = ds['time_series'].to_numpy().transpose(1, 0, 2) # shape of (N, T, D)
adj_matrix = ds['adjacency_matrix'].to_numpy()
# NOTE: If dataset is from coupled system e.g., "../data/coupled/...", follow these changes to load the data:
## timeseries = ds['time_series'].to_numpy()[..., 0].transpose(1, 0, 2) ## select the first dimension of each multidimensional node
## adj_matrix = ds['adjacency_matrix_summary'].to_numpy() ## `adjacency_matrix_summary` is the true adjacency matrix for coupled system
Evaluation#
Evaluate TCDF
# TCDF
tau_max = 1
## Estimate adjacency matrix
tcdf_adj_matrix = []
for x in tqdm(timeseries):
tcdf_model = TCDF()
tcdf_model.run(X=x)
tcdf_adj_matrix.append(copy.deepcopy(tcdf_model.adj_matrix))
## Compute scores
score(
preds= np.array(tcdf_adj_matrix),
labs= adj_matrix,
name='TCDF'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00, 1.11s/it]
Scoring...
| TCDF | |
|---|---|
| Metric | |
| Joint AUROC | 0.500000 |
| Individual AUROC | 0.500000 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.666667 |
| Individual AUPRC | 0.666667 |
| Null AUPRC | 0.666667 |
| Joint SHD | 60.000000 |
G = create_scm_graph(tcdf_model.adj_matrix)
plot_scm(G);
Evaluate GRASP
# GRASP
tau_max = 1
## Estimate adjacency matrix
grasp_adj_matrix = []
for x in tqdm(timeseries):
grasp_model = GRASP(tau_max=tau_max)
grasp_model.run(X=x)
grasp_adj_matrix.append(copy.deepcopy(grasp_model.adj_matrix))
## Compute scores
score(
preds= np.array(grasp_adj_matrix),
labs= adj_matrix,
name='GRASP'
)
0%| | 0/10 [00:00<?, ?it/s]
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP edge count: 2
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
GRaSP completed in: 0.00s
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 250.20it/s]
Scoring...
| GRASP | |
|---|---|
| Metric | |
| Joint AUROC | 0.476667 |
| Individual AUROC | 0.500000 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.684127 |
| Individual AUPRC | 0.690476 |
| Null AUPRC | 0.666667 |
| Joint SHD | 54.000000 |
G = create_scm_graph(grasp_model.adj_matrix)
plot_scm(G);
Evaluate GIN
# GIN
tau_max = 1
## Estimate adjacency matrix
gin_adj_matrix = []
for x in tqdm(timeseries):
gin_model = GIN(tau_max=tau_max)
gin_model.run(X=x)
gin_adj_matrix.append(copy.deepcopy(gin_model.adj_matrix))
## Compute scores
score(
preds= np.array(gin_adj_matrix),
labs= adj_matrix,
name='GIN'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00, 9.71it/s]
Scoring...
| GIN | |
|---|---|
| Metric | |
| Joint AUROC | 0.500000 |
| Individual AUROC | 0.500000 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.666667 |
| Individual AUPRC | 0.666667 |
| Null AUPRC | 0.666667 |
| Joint SHD | 60.000000 |
G = create_scm_graph(gin_model.adj_matrix)
plot_scm(G);
Evaluate RCD
# RCD
tau_max = 1
## Estimate adjacency matrix
rcd_adj_matrix = []
for x in tqdm(timeseries):
rcd_model = RCD(tau_max=tau_max)
rcd_model.run(X=x)
rcd_adj_matrix.append(copy.deepcopy(rcd_model.adj_matrix))
## Compute scores
score(
preds= np.array(rcd_adj_matrix),
labs= adj_matrix,
name='RCD'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00, 4.44it/s]
Scoring...
| RCD | |
|---|---|
| Metric | |
| Joint AUROC | 0.500000 |
| Individual AUROC | 0.500000 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.666667 |
| Individual AUPRC | 0.666667 |
| Null AUPRC | 0.666667 |
| Joint SHD | 60.000000 |
G = create_scm_graph(rcd_model.adj_matrix)
plot_scm(G);
Evaluate CUTS+
# CUTS+
tau_max = 1
corr_thres = 0.8
## Estimate adjacency matrix
cuts_adj_matrix = []
for x in tqdm(timeseries):
cuts_model = CUTSPlus(tau_max=tau_max, corr_thres=corr_thres)
cuts_model.run(X=x)
cuts_adj_matrix.append(copy.deepcopy(cuts_model.adj_matrix))
## Compute scores
score(
preds= np.array(cuts_adj_matrix),
labs= adj_matrix,
name='CUTS+'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:33<00:00, 3.34s/it]
Scoring...
| CUTS+ | |
|---|---|
| Metric | |
| Joint AUROC | 0.500000 |
| Individual AUROC | 0.500000 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.666667 |
| Individual AUPRC | 0.666667 |
| Null AUPRC | 0.666667 |
| Joint SHD | 50.000000 |
G = create_scm_graph(cuts_model.adj_matrix)
plot_scm(G);
Evaluate PCMCI+
# PCMCI+
tau_max = 1
pc_alpha = 0.01
## Estimate adjacency matrix
pcmci_plus_adj_matrix = []
for x in tqdm(timeseries):
pcmciplus_model = PCMCIPlus(tau_max=tau_max, pc_alpha=pc_alpha)
pcmciplus_model.run(X=x)
pcmci_plus_adj_matrix.append(copy.deepcopy(pcmciplus_model.adj_matrix))
## Compute scores
score(
preds= np.array(pcmci_plus_adj_matrix),
labs= adj_matrix,
name='PCMCI+'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 18.66it/s]
Scoring...
| PCMCI+ | |
|---|---|
| Metric | |
| Joint AUROC | 0.666667 |
| Individual AUROC | 0.666667 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.750000 |
| Individual AUPRC | 0.750000 |
| Null AUPRC | 0.666667 |
| Joint SHD | 20.000000 |
G = create_scm_graph(pcmciplus_model.adj_matrix)
plot_scm(G);
Evaluate VARLiNGAM
# VARLiNGAM
tau_max = 1
## Estimate adjacency matrix
varlingam_adj_matrix = []
for x in tqdm(timeseries):
varlingam_model = VARLiNGAM(tau_max=tau_max)
varlingam_model.run(X=x)
varlingam_adj_matrix.append(copy.deepcopy(varlingam_model.adj_matrix))
## Compute scores
score(
preds= np.array(varlingam_adj_matrix),
labs= adj_matrix,
name='VARLiNGAM'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 23.45it/s]
Scoring...
| VARLiNGAM | |
|---|---|
| Metric | |
| Joint AUROC | 0.658333 |
| Individual AUROC | 0.658333 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.746195 |
| Individual AUPRC | 0.755258 |
| Null AUPRC | 0.666667 |
| Joint SHD | 23.000000 |
G = create_scm_graph(varlingam_model.adj_matrix)
plot_scm(G);
Evaluate DYNOTEARS
# DYNOTEARS
tau_max = 1
## Estimate adjacency matrix
dynotears_adj_matrix = []
for x in tqdm(timeseries):
dynotears_model = DYNOTEARS(tau_max=tau_max)
dynotears_model.run(X=x)
dynotears_adj_matrix.append(copy.deepcopy(dynotears_model.adj_matrix))
## Compute scores
score(
preds= np.array(dynotears_adj_matrix),
labs= adj_matrix,
name='DYNOTEARS'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 90.10it/s]
Scoring...
| DYNOTEARS | |
|---|---|
| Metric | |
| Joint AUROC | 0.733333 |
| Individual AUROC | 0.733333 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.789474 |
| Individual AUPRC | 0.792857 |
| Null AUPRC | 0.666667 |
| Joint SHD | 16.000000 |
G = create_scm_graph(dynotears_model.adj_matrix)
plot_scm(G);
Evaluate NeuralGC (cLSTM)
# cLSTM
tau_max = 1
## Estimate adjacency matrix
clstm_adj_matrix = []
for x in tqdm(timeseries):
clstm_model = NGC_LSTM(tau_max=tau_max)
clstm_model.run(X=x)
clstm_adj_matrix.append(copy.deepcopy(clstm_model.adj_matrix))
## Compute scores
score(
preds= np.array(clstm_adj_matrix),
labs= adj_matrix,
name='cLSTM'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00, 1.19s/it]
Scoring...
| cLSTM | |
|---|---|
| Metric | |
| Joint AUROC | 0.500000 |
| Individual AUROC | 0.500000 |
| Null AUROC | 0.500000 |
| Joint AUPRC | 0.666667 |
| Individual AUPRC | 0.666667 |
| Null AUPRC | 0.666667 |
| Joint SHD | 30.000000 |
G = create_scm_graph(clstm_model.adj_matrix)
plot_scm(G);