IHDP 因果效应估计¶
DoWhy example on ihdp (Infant Health and Development Program) dataset
# importing required libraries
import os, sys
import dowhy
from dowhy import CausalModel
import pandas as pd
import numpy as np
# Loading Data
data= pd.read_csv("https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv", header = None)
col = ["treatment", "y_factual", "y_cfactual", "mu0", "mu1" ,]
for i in range(1,26):
data.columns = col
data = data.astype({"treatment":'bool'}, copy=False)
# Create a causal model from the data and given common causes.
xs = ""
for i in range(1,26):
xs += ("x"+str(i)+"+")
data = data,
#Identify the causal effect
identified_estimand = model.identify_effect()
3. Estimate (using different methods)¶
3.1 Using Linear Regression¶
# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
method_name="backdoor.linear_regression", test_significance=True
print("Causal Estimate is " + str(estimate.value))
data_1 = data[data["treatment"]==1]
data_0 = data[data["treatment"]==0]
print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))
3.2 Using Propensity Score Matching¶
estimate = model.estimate_effect(identified_estimand,
print("Causal Estimate is " + str(estimate.value))
print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))
3.3 Using Propensity Score Stratification¶
estimate = model.estimate_effect(identified_estimand,
method_name="backdoor.propensity_score_stratification", method_params={'num_strata':50, 'clipping_threshold':5}
print("Causal Estimate is " + str(estimate.value))
print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))
3.4 Using Propensity Score Weighting¶
estimate = model.estimate_effect(identified_estimand,
print("Causal Estimate is " + str(estimate.value))
print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))
4. Refute¶
Refute the obtained estimate using multiple robustness checks.
4.1 Adding a random common cause¶
refute_results=model.refute_estimate(identified_estimand, estimate,
4.2 Using a placebo treatment¶
res_placebo=model.refute_estimate(identified_estimand, estimate,
method_name="placebo_treatment_refuter", placebo_type="permute")
4.3 Data Subset Refuter¶
res_subset=model.refute_estimate(identified_estimand, estimate,
method_name="data_subset_refuter", subset_fraction=0.9)
