Commit 2802a4f2 authored by Fanny's avatar Fanny

copied code to public repo

parents
# Regularized Adversarial Training
This repository contains code to train and evaluate models against
adversarially chosen rotations and translations. It can be used to reproduce the
main experiments of:
**Invariance-inducing regularization using worst-case transformations suffices to boost accuracy and spatial robustness**<br>
*Fanny Yang, Zuowen Wang and Christina Heinze-Deml*<br>
The code is based on https://github.com/MadryLab/adversarial_spatial.
The main scipts to run are `train.py` and `eval.py`, which will train and
evaluate a model respectively.
**Note:** `train.py` only supports groups of size 2 for now.
Different training options are included in the folder `configs`
A template is annotated below.
```
{
"model": {
"output_dir": "output/cifar10",
"pad_mode": "constant",
"model_family": "resnet",
"resnet_depth_n": 5,
"filters": [16, 16, 32, 64],
"pad_size": 32,
"n_classes": 10,
"use_reg": true #true if we use regularization
},
"training": {
"tf_random_seed": 1,
"np_random_seed": 1,
"max_num_training_steps": 80000,
"num_output_steps": 5000,
"num_summary_steps": 5000,
"num_easyeval_steps": 5000,
"num_eval_steps": 80000,
"num_checkpoint_steps": 5000,
"num_ids": 64, #annotated as b
"batch_size": 128, #can be b, 2b or 3b
"lr" : 0.1,
"step_size_schedule": [[0, 0.1], [40000, 0.01], [60000, 0.001]],
"momentum": 0.9,
"weight_decay": 0.0002,
"eval_during_training": true,
"adversarial_training": true,
"adversarial_ce": false, # set to true if only adversarial examples are used for cross-entropy
"nat_ce": false, # set to true if only original examples are used for cross-entropy
"data_augmentation": true,
"data_augmentation_reg": false,
"group_size": 2,
"lambda_": 1 #the coefficient of the regularizer
},
"eval": {
"num_eval_examples": 10000,
"batch_size": 128,
"adversarial_eval": true
},
#defense mechanism
"defense": {
"reg_type": "kl",
"cce_adv_exp_wrt": "cce", # adversarial examples used for cross-entropy are generated w.r.t
"reg_adv_exp_wrt": "kl", # adversarial examples used for regularizer are generated w.r.t
"use_linf": false,
"use_spatial": true,
"only_rotation": false,
"only_translation": false,
"loss_function": "xent",
"epsilon": 8.0,
"num_steps": 5,
"step_size": 2.0,
"random_start": false,
"spatial_method": "random",
"spatial_limits": [3, 3, 30],
"random_tries": 10,
"grid_granularity": [5, 5, 31]
},
#attack policy (for evaluation)
"attack": {
"use_linf": false,
"use_spatial": true,
"only_rotation": false,
"only_translation": false,
"loss_function": "xent",
"epsilon": 8.0,
"num_steps": 5,
"step_size": 2.0,
"random_start": false,
"spatial_limits": [3, 3, 30],
"random_tries": 10,
"grid_granularity": [5, 5, 31]
},
"data": {
"dataset_name": "cifar-10",
"data_path": "./datasets/cifar10"
}
}
```
Run with a particular config file
```
python train.py --config PATH/TO/FILE
```
## Standard CIFAR data augmentation
By default data augmentation only includes random left-right flips. Standard CIFAR10
augmentation (+-4 pixel crops) can be achieved by setting
`adversarial_training: true`, `spatial_method: random`, `random_tries: 1`,
`spatial_limits: [4, 4, 0]`.
run
```
python train.py --config ./configs/std.json
```
for standard training (std) with only translation as data augmentation
run
```
python train.py --config ./configs/std_star.json
```
for training (std*) with translation and rotation as data augmentation
## Run with various settings for adversarial training
### Run with unregularized adversarial training
Set ```use_reg = false``` in the configuration file.
See ```configs/at_rob_wo_10.json``` for an example.
### Run with different batch types
We can use solely original images for the cross-entropy part of the loss function.
To achieve that, set ```nat_ce = true, adversarial_ce = false``` in the configuration file.
Accordingly, ```nat_ce = false, adversarial_ce = true``` and ```nat_ce = false, adversarial_ce = false``` correspond to "rob" and "mix" in the
paper respectively. For "nat" we only use the original examples for the cross-entropy and for "mix" we use both original
and adversarial examples.
### Generate adversarial examples w.r.t different functions
Regardless loss function, we can generate adversarial examples, which can be used independently for either
cross-entropy or regularizer, with respect to different functions.
To achieve this, we need to configure ```cce_adv_exp_wrt``` and ```reg_adv_exp_wrt```
For instance, to conduct training in the same way as adversarial logit pairing (ALP) [1],
we set both ```cce_adv_exp_wrt``` and ```reg_adv_exp_wrt``` to ```cce```. Then the adversarial
examples which entering the regularizer will be generated w.r.t. cross-entropy.
**Note:** if ```cce_adv_exp_wrt != reg_adv_exp_wrt``` and using a mixed batch for cross-entropy, we need to set
```batch_size == 3 * num_ids```, since we need two sets of different adversarial examples for
cross-entropy and regularizer respectively.
Please refer to ```configs/l2_mix_wo_10.json```
# Citation
[1] Harini Kannan, Alexey Kurakin, and Ian Goodfellow. Adversarial Logit Pairing. arXiv preprint
arXiv:1803.06373, 2018.
"""
Utilities for importing the CIFAR100 dataset.
Each image in the dataset is a numpy array of shape (32, 32, 3), with the values
being unsigned integers (i.e., in the range 0,1,...,255).
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import pickle
import random
import sys
import tensorflow as tf
version = sys.version_info
import numpy as np
class CIFAR100Data(object):
"""
Unpickles the CIFAR100 dataset from a specified folder containing a pickled
version following the format of Krizhevsky which can be found
[here](https://www.cs.toronto.edu/~kriz/cifar.html).
Inputs to constructor
=====================
- path: path to the pickled dataset. The training data must be pickled
into five files named data_batch_i for i = 1, ..., 5, containing 10,000
examples each, the test data
must be pickled into a single file called test_batch containing 10,000
examples, and the 10 class names must be
pickled into a file called batches.meta. The pickled examples should
be stored as a tuple of two objects: an array of 10,000 32x32x3-shaped
arrays, and an array of their 10,000 true labels.
"""
def __init__(self, path):
train_filename = 'train'
eval_filename = 'test'
metadata_filename = 'meta'
train_images, train_labels = self._load_datafile(
os.path.join(path, train_filename))
eval_images, eval_labels = self._load_datafile(
os.path.join(path, eval_filename))
with open(os.path.join(path, metadata_filename), 'rb') as fo:
if version.major == 3:
data_dict = pickle.load(fo, encoding='bytes')
else:
data_dict = pickle.load(fo)
self.label_names = data_dict[b'fine_label_names']
for ii in range(len(self.label_names)):
self.label_names[ii] = self.label_names[ii].decode('utf-8')
self.train_data = Dataset(train_images, train_labels)
self.eval_data = Dataset(eval_images, eval_labels)
@staticmethod
def _load_datafile(filename):
with open(filename, 'rb') as fo:
if version.major == 3:
data_dict = pickle.load(fo, encoding='bytes')
else:
data_dict = pickle.load(fo)
assert data_dict[b'data'].dtype == np.uint8
image_data = data_dict[b'data']
n = image_data.shape[0]
image_data = image_data.reshape((n, 3, 32, 32)).transpose(0,2,3,1)
return image_data, np.array(data_dict[b'fine_labels'])
class AugmentedCIFAR100Data(object):
"""
Data augmentation wrapper over a loaded dataset.
Inputs to constructor
=====================
- raw_cifar100data: the loaded CIFAR100 dataset, via the CIFAR100Data class
- sess: current tensorflow session
"""
def __init__(self, raw_cifar100data, sess):
assert isinstance(raw_cifar100data, CIFAR100Data)
self.image_size = 32
# create augmentation computational graph
self.x_input_placeholder = tf.placeholder(tf.float32,
shape=[None, 32, 32, 3])
# random transforamtion parameters
flipped = tf.map_fn(lambda img: tf.image.random_flip_left_right(img),
self.x_input_placeholder)
self.augmented = flipped
self.train_data = AugmentedDataset(raw_cifar100data.train_data, sess,
self.x_input_placeholder,
self.augmented)
self.eval_data = AugmentedDataset(raw_cifar100data.eval_data, sess,
self.x_input_placeholder,
self.augmented,1)
self.label_names = raw_cifar100data.label_names
class Dataset(object):
"""
Dataset object implementing a simple batching procedure.
"""
def __init__(self, xs, ys):
self.xs = xs
self.n = xs.shape[0]
self.ys = ys
self.batch_start = 0
self.cur_order = np.random.permutation(self.n)
def get_next_batch(self, batch_size, multiple_passes=False,
reshuffle_after_pass=True):
epoch_done = False
if self.n < batch_size:
raise ValueError('Batch size can be at most the dataset size')
if not multiple_passes:
actual_batch_size = min(batch_size, self.n - self.batch_start)
if actual_batch_size <= 0:
raise ValueError('Pass through the dataset is complete.')
batch_end = self.batch_start + actual_batch_size
batch_xs = self.xs[self.cur_order[self.batch_start : batch_end],...]
batch_ys = self.ys[self.cur_order[self.batch_start : batch_end],...]
self.batch_start += actual_batch_size
return batch_xs, batch_ys
actual_batch_size = min(batch_size, self.n - self.batch_start)
if actual_batch_size < batch_size:
epoch_done = True
if reshuffle_after_pass:
self.cur_order = np.random.permutation(self.n)
self.batch_start = 0
batch_end = self.batch_start + batch_size
batch_xs = self.xs[self.cur_order[self.batch_start : batch_end], ...]
batch_ys = self.ys[self.cur_order[self.batch_start : batch_end], ...]
self.batch_start += actual_batch_size
return batch_xs, batch_ys, epoch_done
class AugmentedDataset(object):
"""
Dataset object with built-in data augmentation. When performing
adversarial attacks, we cannot include data augmentation as part of the
model. If we do the adversary will try to backprop through it.
"""
def __init__(self, raw_datasubset, sess, x_input_placeholder,
augmented, to_cache=False):
self.sess = sess
self.raw_datasubset = raw_datasubset
self.x_input_placeholder = x_input_placeholder
self.augmented = augmented
if to_cache:
# Get the actual data from the raw
self.xs = self.sess.run(self.augmented,
feed_dict={self.x_input_placeholder:
self.raw_datasubset.xs})
self.n = self.xs.shape[0]
self.ys = self.raw_datasubset.ys
else:
self.xs = None
self.n = None
self.ys = None
def get_next_batch(self, batch_size, multiple_passes=False,
reshuffle_after_pass=True):
raw_batch = self.raw_datasubset.get_next_batch(batch_size,
multiple_passes,
reshuffle_after_pass)
epoch_done = raw_batch[2]
#images = raw_batch[0].astype(np.float32)
return (self.sess.run(
self.augmented,
feed_dict={self.x_input_placeholder:
raw_batch[0]}), raw_batch[1], epoch_done)
"""
Utilities for importing the CIFAR10 dataset.
Each image in the dataset is a numpy array of shape (32, 32, 3), with the values
being unsigned integers (i.e., in the range 0,1,...,255).
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import pickle
import random
import sys
import tensorflow as tf
version = sys.version_info
import numpy as np
class CIFAR10Data(object):
"""
Unpickles the CIFAR10 dataset from a specified folder containing a pickled
version following the format of Krizhevsky which can be found
[here](https://www.cs.toronto.edu/~kriz/cifar.html).
Inputs to constructor
=====================
- path: path to the pickled dataset. The training data must be pickled
into five files named data_batch_i for i = 1, ..., 5, containing 10,000
examples each, the test data
must be pickled into a single file called test_batch containing 10,000
examples, and the 10 class names must be
pickled into a file called batches.meta. The pickled examples should
be stored as a tuple of two objects: an array of 10,000 32x32x3-shaped
arrays, and an array of their 10,000 true labels.
"""
def __init__(self, path):
train_filenames = ['data_batch_{}'.format(ii + 1) for ii in range(5)]
eval_filename = 'test_batch'
metadata_filename = 'batches.meta'
train_images = np.zeros((50000, 32, 32, 3), dtype='uint8')
train_labels = np.zeros(50000, dtype='int32')
for ii, fname in enumerate(train_filenames):
cur_images, cur_labels = self._load_datafile(
os.path.join(path, fname))
train_images[ii * 10000 : (ii+1) * 10000, ...] = cur_images
train_labels[ii * 10000 : (ii+1) * 10000, ...] = cur_labels
eval_images, eval_labels = self._load_datafile(
os.path.join(path, eval_filename))
with open(os.path.join(path, metadata_filename), 'rb') as fo:
if version.major == 3:
data_dict = pickle.load(fo, encoding='bytes')
else:
data_dict = pickle.load(fo)
self.label_names = data_dict[b'label_names']
for ii in range(len(self.label_names)):
self.label_names[ii] = self.label_names[ii].decode('utf-8')
self.train_data = Dataset(train_images, train_labels)
self.eval_data = Dataset(eval_images, eval_labels)
@staticmethod
def _load_datafile(filename):
with open(filename, 'rb') as fo:
if version.major == 3:
data_dict = pickle.load(fo, encoding='bytes')
else:
data_dict = pickle.load(fo)
assert data_dict[b'data'].dtype == np.uint8
image_data = data_dict[b'data']
image_data = image_data.reshape((10000, 3, 32, 32)).transpose(0,2,3,1)
return image_data, np.array(data_dict[b'labels'])
class Dataset(object):
"""
Dataset object implementing a simple batching procedure.
"""
def __init__(self, xs, ys):
self.xs = xs
self.n = xs.shape[0]
self.ys = ys
self.batch_start = 0
self.cur_order = np.random.permutation(self.n)
def get_next_batch(self, batch_size, multiple_passes=False,
reshuffle_after_pass=True):
epoch_done = False
if self.n < batch_size:
raise ValueError('Batch size can be at most the dataset size')
if not multiple_passes:
actual_batch_size = min(batch_size, self.n - self.batch_start)
if actual_batch_size <= 0:
raise ValueError('Pass through the dataset is complete.')
batch_end = self.batch_start + actual_batch_size
batch_xs = self.xs[self.cur_order[self.batch_start : batch_end],...]
batch_ys = self.ys[self.cur_order[self.batch_start : batch_end],...]
self.batch_start += actual_batch_size
return batch_xs, batch_ys
actual_batch_size = min(batch_size, self.n - self.batch_start)
if actual_batch_size < batch_size:
epoch_done = True
if reshuffle_after_pass:
self.cur_order = np.random.permutation(self.n)
self.batch_start = 0
batch_end = self.batch_start + batch_size
batch_xs = self.xs[self.cur_order[self.batch_start : batch_end], ...]
batch_ys = self.ys[self.cur_order[self.batch_start : batch_end], ...]
self.batch_start += actual_batch_size
return batch_xs, batch_ys, epoch_done
class AugmentedCIFAR10Data(object):
"""
Data augmentation wrapper over a loaded dataset.
Inputs to constructor
=====================
- raw_cifar10data: the loaded CIFAR10 dataset, via the CIFAR10Data class
- sess: current tensorflow session
"""
def __init__(self, raw_cifar10data, sess):
assert isinstance(raw_cifar10data, CIFAR10Data)
self.image_size = 32
# create augmentation computational graph
self.x_input_placeholder = tf.placeholder(tf.float32,
shape=[None, 32, 32, 3])
# random transforamtion parameters
flipped = tf.map_fn(lambda img: tf.image.random_flip_left_right(img),
self.x_input_placeholder)
self.augmented = flipped
self.train_data = AugmentedDataset(raw_cifar10data.train_data, sess,
self.x_input_placeholder,
self.augmented)
self.eval_data = AugmentedDataset(raw_cifar10data.eval_data, sess,
self.x_input_placeholder,
self.augmented, 1)
self.label_names = raw_cifar10data.label_names
class AugmentedDataset(object):
"""
Dataset object with built-in data augmentation. When performing
adversarial attacks, we cannot include data augmentation as part of the
model. If we do the adversary will try to backprop through it.
"""
def __init__(self, raw_datasubset, sess, x_input_placeholder,
augmented, to_cache=False):
self.sess = sess
self.raw_datasubset = raw_datasubset
self.x_input_placeholder = x_input_placeholder
self.augmented = augmented
if to_cache:
# Get the actual data from the raw
self.xs = self.sess.run(self.augmented,
feed_dict={self.x_input_placeholder:
self.raw_datasubset.xs})
self.n = self.xs.shape[0]
self.ys = self.raw_datasubset.ys
else:
self.xs = None
self.n = None
self.ys = None
def get_next_batch(self, batch_size, multiple_passes=False,
reshuffle_after_pass=True):
raw_batch = self.raw_datasubset.get_next_batch(batch_size,
multiple_passes,
reshuffle_after_pass)
epoch_done = raw_batch[2]
#images = raw_batch[0].astype(np.float32)
return (self.sess.run(
self.augmented,
feed_dict={self.x_input_placeholder:
raw_batch[0]}), raw_batch[1], epoch_done)
{
"model": {
"output_dir": "output/cifar10",
"pad_mode": "constant",
"model_family": "resnet",
"resnet_depth_n": 5,
"filters": [16, 16, 32, 64],
"pad_size": 32,
"n_classes": 10,
"use_reg": true
},
"training": {
"tf_random_seed": 1,
"np_random_seed": 1,
"max_num_training_steps": 80000,
"num_output_steps": 5000,
"num_summary_steps": 5000,
"num_easyeval_steps": 5000,
"num_eval_steps": 80000,
"num_checkpoint_steps": 5000,
"num_ids": 64,
"batch_size": 128,
"lr" : 0.1,
"step_size_schedule": [[0, 0.1], [40000, 0.01], [60000, 0.001]],
"momentum": 0.9,
"weight_decay": 0.0002,
"eval_during_training": true,
"adversarial_training": true,
"adversarial_ce": false,
"nat_ce": false,
"data_augmentation": true,
"data_augmentation_reg": false,
"group_size": 2,
"lambda_": 1
},
"eval": {
"num_eval_examples": 10000,
"batch_size": 128,
"adversarial_eval": true
},
"defense": {
"reg_type": "l2",
"cce_adv_exp_wrt": "cce",
"reg_adv_exp_wrt": "cce",
"use_linf": false,
"use_spatial": true,
"only_rotation": false,
"only_translation": false,
"loss_function": "xent",
"epsilon": 8.0,
"num_steps": 5,
"step_size": 2.0,
"random_start": false,
"spatial_method": "random",
"spatial_limits": [3, 3, 30],
"random_tries": 10,
"grid_granularity": [5, 5, 31]
},
"attack": {
"use_linf": false,
"use_spatial": true,
"only_rotation": false,
"only_translation": false,
"loss_function": "xent",
"epsilon": 8.0,
"num_steps": 5,
"step_size": 2.0,
"random_start": false,
"spatial_limits": [3, 3, 30],
"random_tries": 10,
"grid_granularity": [5, 5, 31]
},
"data": {
"dataset_name": "cifar-10",
"data_path": "/datasets/cifar10"
}
}
{
"model": {
"output_dir": "output/cifar10",
"pad_mode": "constant",
"model_family": "resnet",
"resnet_depth_n": 5,
"filters": [16, 16, 32, 64],
"pad_size": 32,
"n_classes": 10,
"use_reg": true
},
"training": {
"tf_random_seed": 1,
"np_random_seed": 1,
"max_num_training_steps": 80000,
"num_output_steps": 5000,
"num_summary_steps": 5000,
"num_easyeval_steps": 5000,
"num_eval_steps": 80000,
"num_checkpoint_steps": 5000,
"num_ids": 64,
"batch_size": 128,
"lr" : 0.1,
"step_size_schedule": [[0, 0.1], [40000, 0.01], [60000, 0.001]],
"momentum": 0.9,
"weight_decay": 0.0002,
"eval_during_training": true,
"adversarial_training": true,
"adversarial_ce": false,
"nat_ce": true,
"data_augmentation": true,
"data_augmentation_reg": false,
"group_size": 2,
"lambda_": 1
},
"eval": {
"num_eval_examples": 10000,
"batch_size": 128,
"adversarial_eval": true
},
"defense": {
"reg_type": "l2",
"cce_adv_exp_wrt": "cce",
"reg_adv_exp_wrt": "cce",
"use_linf": false,
"use_spatial": true,
"only_rotation": false,
"only_translation": false,
"loss_function": "xent",
"epsilon": 8.0,
"num_steps": 5,
"step_size": 2.0,
"random_start": false,
"spatial_method": "random",
"spatial_limits": [3, 3, 30],
"random_tries": 10,
"grid_granularity": [5, 5, 31]
},
"attack": {
"use_linf": false,
"use_spatial": true,
"only_rotation": false,
"only_translation": false,
"loss_function": "xent",
"epsilon": 8.0,
"num_steps": 5,
"step_size": 2.0,
"random_start": false,
"spatial_limits": [3, 3, 30],
"random_tries": 10,
"grid_granularity": [5, 5, 31]
},
"data": {
"dataset_name": "cifar-10",
"data_path": "/datasets/cifar10"
}
}
{
"model": {
"output_dir": "output/cifar10",