Evaluation

Evaluation#

This notebook serves to showcase how the evaluation pipeline is performed. Full script can be found at eval.py

%load_ext autoreload
%autoreload 2
import copy
import numpy as np
import xarray as xr

from causaldynamics.scm import create_scm_graph
from causaldynamics.plot import plot_scm
from causaldynamics.score import score
from causaldynamics.baselines import PCMCIPlus, FPCMCI, VARLiNGAM, DYNOTEARS, NGC_LSTM, TSCI, CUTSPlus, RCD, GIN, GRASP, TCDF

from tqdm import tqdm
from pathlib import Path

import warnings
warnings.filterwarnings('ignore')
/burg/glab/users/jn2808/.conda/envs/kausal/lib/python3.10/site-packages/jaxtyping/__init__.py:231: UserWarning: jaxtyping version >=0.2.23 should be used with Equinox version >=0.11.1
  warnings.warn(

Complete script to generate the dataset can be found under scripts/generate_<system>.py.

Otherwise, follow our instructions to download and process the pre-generated dataset from https://huggingface.co/datasets/kausable/CausalDynamics

# Once you have the dataset, either generated or downloaded
# Notice that the dataset has `time_series` and `adjacency_matrix` variables.
DATA_DIR = Path("../data/simple/noise=0.00_confounder=False/data")  # (you can change this to your own path)
ds = xr.open_dataset(DATA_DIR / "Rossler_N10_T1000.nc")

# Extract timeseries and adjacency matrix as target
timeseries = ds['time_series'].to_numpy().transpose(1, 0, 2) # shape of (N, T, D)
adj_matrix = ds['adjacency_matrix'].to_numpy()

# NOTE: If dataset is from coupled system e.g., "../data/coupled/...", follow these changes to load the data:
## timeseries = ds['time_series'].to_numpy()[..., 0].transpose(1, 0, 2) ## select the first dimension of each multidimensional node
## adj_matrix = ds['adjacency_matrix_summary'].to_numpy() ## `adjacency_matrix_summary` is the true adjacency matrix for coupled system

Evaluation#

Evaluate TCDF

# TCDF
tau_max = 1

## Estimate adjacency matrix
tcdf_adj_matrix = []
for x in tqdm(timeseries):
    tcdf_model = TCDF()
    tcdf_model.run(X=x)
    tcdf_adj_matrix.append(copy.deepcopy(tcdf_model.adj_matrix))

## Compute scores
score(
    preds= np.array(tcdf_adj_matrix),
    labs= adj_matrix,
    name='TCDF'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.11s/it]
Scoring...

TCDF
Metric
Joint AUROC 0.500000
Individual AUROC 0.500000
Null AUROC 0.500000
Joint AUPRC 0.666667
Individual AUPRC 0.666667
Null AUPRC 0.666667
Joint SHD 60.000000
G = create_scm_graph(tcdf_model.adj_matrix)
plot_scm(G);
../_images/ec2e21c0501e63106df7fc814d5097c368717909e22ca9e9d96a0d2bfdec824f.png

Evaluate GRASP

# GRASP
tau_max = 1

## Estimate adjacency matrix
grasp_adj_matrix = []
for x in tqdm(timeseries):
    grasp_model = GRASP(tau_max=tau_max)
    grasp_model.run(X=x)
    grasp_adj_matrix.append(copy.deepcopy(grasp_model.adj_matrix))

## Compute scores
score(
    preds= np.array(grasp_adj_matrix),
    labs= adj_matrix,
    name='GRASP'
)
  0%|                                                                                                          | 0/10 [00:00<?, ?it/s]
GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 
GRaSP edge count: 2    
GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 

GRaSP completed in: 0.00s 
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 250.20it/s]
Scoring...

GRASP
Metric
Joint AUROC 0.476667
Individual AUROC 0.500000
Null AUROC 0.500000
Joint AUPRC 0.684127
Individual AUPRC 0.690476
Null AUPRC 0.666667
Joint SHD 54.000000
G = create_scm_graph(grasp_model.adj_matrix)
plot_scm(G);
../_images/4e363d520e58f0ab4b70a2ab60d90f6cf920659336e4c005c62db1c474ddc6a4.png

Evaluate GIN

# GIN
tau_max = 1

## Estimate adjacency matrix
gin_adj_matrix = []
for x in tqdm(timeseries):
    gin_model = GIN(tau_max=tau_max)
    gin_model.run(X=x)
    gin_adj_matrix.append(copy.deepcopy(gin_model.adj_matrix))

## Compute scores
score(
    preds= np.array(gin_adj_matrix),
    labs= adj_matrix,
    name='GIN'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.71it/s]
Scoring...

GIN
Metric
Joint AUROC 0.500000
Individual AUROC 0.500000
Null AUROC 0.500000
Joint AUPRC 0.666667
Individual AUPRC 0.666667
Null AUPRC 0.666667
Joint SHD 60.000000
G = create_scm_graph(gin_model.adj_matrix)
plot_scm(G);
../_images/5b3f778368fa366ff2b7fb7717b6d187f384e09dcbc638f2de3fcb177d92cf0e.png

Evaluate RCD

# RCD
tau_max = 1

## Estimate adjacency matrix
rcd_adj_matrix = []
for x in tqdm(timeseries):
    rcd_model = RCD(tau_max=tau_max)
    rcd_model.run(X=x)
    rcd_adj_matrix.append(copy.deepcopy(rcd_model.adj_matrix))

## Compute scores
score(
    preds= np.array(rcd_adj_matrix),
    labs= adj_matrix,
    name='RCD'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.44it/s]
Scoring...

RCD
Metric
Joint AUROC 0.500000
Individual AUROC 0.500000
Null AUROC 0.500000
Joint AUPRC 0.666667
Individual AUPRC 0.666667
Null AUPRC 0.666667
Joint SHD 60.000000
G = create_scm_graph(rcd_model.adj_matrix)
plot_scm(G);
../_images/6f32f513598e0fc6ed8df8d48540854b9a31c5e2eefb8a841a4bf0fbc0d765ed.png

Evaluate CUTS+

# CUTS+
tau_max = 1
corr_thres = 0.8

## Estimate adjacency matrix
cuts_adj_matrix = []
for x in tqdm(timeseries):
    cuts_model = CUTSPlus(tau_max=tau_max, corr_thres=corr_thres)
    cuts_model.run(X=x)
    cuts_adj_matrix.append(copy.deepcopy(cuts_model.adj_matrix))

## Compute scores
score(
    preds= np.array(cuts_adj_matrix),
    labs= adj_matrix,
    name='CUTS+'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:33<00:00,  3.34s/it]
Scoring...

CUTS+
Metric
Joint AUROC 0.500000
Individual AUROC 0.500000
Null AUROC 0.500000
Joint AUPRC 0.666667
Individual AUPRC 0.666667
Null AUPRC 0.666667
Joint SHD 50.000000
G = create_scm_graph(cuts_model.adj_matrix)
plot_scm(G);
../_images/2782ab59a8871b313dd3280c1c0460458434ddf74794c2708e40bfddb5d98ff2.png

Evaluate PCMCI+

# PCMCI+
tau_max = 1
pc_alpha = 0.01

## Estimate adjacency matrix
pcmci_plus_adj_matrix = []
for x in tqdm(timeseries):
    pcmciplus_model = PCMCIPlus(tau_max=tau_max, pc_alpha=pc_alpha)
    pcmciplus_model.run(X=x)
    pcmci_plus_adj_matrix.append(copy.deepcopy(pcmciplus_model.adj_matrix))

## Compute scores
score(
    preds= np.array(pcmci_plus_adj_matrix),
    labs= adj_matrix,
    name='PCMCI+'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 18.66it/s]
Scoring...

PCMCI+
Metric
Joint AUROC 0.666667
Individual AUROC 0.666667
Null AUROC 0.500000
Joint AUPRC 0.750000
Individual AUPRC 0.750000
Null AUPRC 0.666667
Joint SHD 20.000000
G = create_scm_graph(pcmciplus_model.adj_matrix)
plot_scm(G);
../_images/cf40309ec8f44a97393b4dc11510c7262518409d57c79c2d70776e6ee15270fa.png

Evaluate VARLiNGAM

# VARLiNGAM
tau_max = 1

## Estimate adjacency matrix
varlingam_adj_matrix = []
for x in tqdm(timeseries):
    varlingam_model = VARLiNGAM(tau_max=tau_max)
    varlingam_model.run(X=x)
    varlingam_adj_matrix.append(copy.deepcopy(varlingam_model.adj_matrix))

## Compute scores
score(
    preds= np.array(varlingam_adj_matrix),
    labs= adj_matrix,
    name='VARLiNGAM'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 23.45it/s]
Scoring...

VARLiNGAM
Metric
Joint AUROC 0.658333
Individual AUROC 0.658333
Null AUROC 0.500000
Joint AUPRC 0.746195
Individual AUPRC 0.755258
Null AUPRC 0.666667
Joint SHD 23.000000
G = create_scm_graph(varlingam_model.adj_matrix)
plot_scm(G);
../_images/65726b71132e882abfe3dc555d738d71eb7412a7ae41f2c26a1111d953e198bc.png

Evaluate DYNOTEARS

# DYNOTEARS
tau_max = 1

## Estimate adjacency matrix
dynotears_adj_matrix = []
for x in tqdm(timeseries):
    dynotears_model = DYNOTEARS(tau_max=tau_max)
    dynotears_model.run(X=x)
    dynotears_adj_matrix.append(copy.deepcopy(dynotears_model.adj_matrix))

## Compute scores
score(
    preds= np.array(dynotears_adj_matrix),
    labs= adj_matrix,
    name='DYNOTEARS'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 90.10it/s]
Scoring...

DYNOTEARS
Metric
Joint AUROC 0.733333
Individual AUROC 0.733333
Null AUROC 0.500000
Joint AUPRC 0.789474
Individual AUPRC 0.792857
Null AUPRC 0.666667
Joint SHD 16.000000
G = create_scm_graph(dynotears_model.adj_matrix)
plot_scm(G);
../_images/e1097cd92a430f6127f089db12fb82de6de2d1c68e72236f0dfe1a7066aecc64.png

Evaluate NeuralGC (cLSTM)

# cLSTM
tau_max = 1

## Estimate adjacency matrix
clstm_adj_matrix = []
for x in tqdm(timeseries):
    clstm_model = NGC_LSTM(tau_max=tau_max)
    clstm_model.run(X=x)
    clstm_adj_matrix.append(copy.deepcopy(clstm_model.adj_matrix))

## Compute scores
score(
    preds= np.array(clstm_adj_matrix),
    labs= adj_matrix,
    name='cLSTM'
)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.19s/it]
Scoring...

cLSTM
Metric
Joint AUROC 0.500000
Individual AUROC 0.500000
Null AUROC 0.500000
Joint AUPRC 0.666667
Individual AUPRC 0.666667
Null AUPRC 0.666667
Joint SHD 30.000000
G = create_scm_graph(clstm_model.adj_matrix)
plot_scm(G);
../_images/ed53060140ab0b9c625dce45938ece424ed1df25448865a42edda513995dc922.png