From de558e841e45620d2c704814faebeb4a530778df Mon Sep 17 00:00:00 2001 From: Shawn-Shan Date: Mon, 18 May 2020 15:35:14 -0500 Subject: [PATCH] refactor code --- fawkes/__init__.py | 0 fawkes/differentiator.py | 4 +- fawkes/encode_utils.py | 617 --------------------------------------- fawkes/eval_cloak.py | 119 +++----- fawkes/protection.py | 110 +++---- fawkes/utils.py | 134 ++++----- 6 files changed, 144 insertions(+), 840 deletions(-) create mode 100644 fawkes/__init__.py delete mode 100644 fawkes/encode_utils.py diff --git a/fawkes/__init__.py b/fawkes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fawkes/differentiator.py b/fawkes/differentiator.py index 88e6c5c..f788bd5 100644 --- a/fawkes/differentiator.py +++ b/fawkes/differentiator.py @@ -9,7 +9,7 @@ import time from decimal import Decimal import numpy as np import tensorflow as tf -from .utils import preprocess, reverse_preprocess +from utils import preprocess, reverse_preprocess class FawkesMaskGeneration: @@ -391,7 +391,7 @@ class FawkesMaskGeneration: if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0: LR = LR / 2 print("Learning Rate: ", LR) - # print out the losses every 10% + if iteration % (self.MAX_ITERATIONS // 10) == 0: if self.verbose == 1: loss_sum = float(self.sess.run(self.loss_sum)) diff --git a/fawkes/encode_utils.py b/fawkes/encode_utils.py deleted file mode 100644 index dcdb3a4..0000000 --- a/fawkes/encode_utils.py +++ /dev/null @@ -1,617 +0,0 @@ -import sys - -sys.path.append("/home/shansixioing/tools/") -import gen_utils -import keras, os -from keras.preprocessing import image -from skimage.transform import resize -from sklearn.model_selection import train_test_split -from keras.models import Model -from keras.layers import Input -from keras.layers import Conv2D, MaxPooling2D, Dense, Activation, Layer -import keras.backend as K -import random, pickle -import numpy as np -from keras.preprocessing.image import ImageDataGenerator -from keras.applications.vgg16 import preprocess_input -from sklearn.metrics import pairwise_distances -from keras.utils import to_categorical - - -def load_dataset_deepid(full=False, num_classes=1283, preprocess='raw'): - if not full: - X_train, Y_train = gen_utils.load_h5py(["X_train", "Y_train"], - "/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_0.h5") - else: - X_train_0, Y_train_0 = gen_utils.load_h5py(["X_train", "Y_train"], - "/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_0.h5") - - X_train_1, Y_train_1 = gen_utils.load_h5py(["X_train", "Y_train"], - "/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_1.h5") - - X_train_2, Y_train_2 = gen_utils.load_h5py(["X_train", "Y_train"], - "/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_2.h5") - - X_train_3, Y_train_3 = gen_utils.load_h5py(["X_train", "Y_train"], - "/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_3.h5") - - X_train = np.concatenate([X_train_0, X_train_1, X_train_2, X_train_3]) - Y_train = np.concatenate([Y_train_0, Y_train_1, Y_train_2, Y_train_3]) - - X_test, Y_test = gen_utils.load_h5py(["X_test", "Y_test"], - "/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_testing.h5") - - X_train = utils_keras.preprocess(X_train, preprocess) - X_test = utils_keras.preprocess(X_test, preprocess) - - return X_train, Y_train, X_test, Y_test - - -def load_dataset(data_file): - dataset = utils_keras.load_dataset(data_file) - - X_train = dataset['X_train'] - Y_train = dataset['Y_train'] - X_test = dataset['X_test'] - Y_test = dataset['Y_test'] - - return X_train, Y_train, X_test, Y_test - - -def load_extractor(name, all_layers=False): - if name is None: - return - m = keras.models.load_model("/home/shansixioing/cloak/models/extractors/{}_extract.h5".format(name)) - if all_layers: - if name == 'vggface1': - target_layers = ['conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', 'flatten', 'fc6', 'fc7'] - extractor = Model(inputs=m.layers[0].input, - outputs=[m.get_layer(l).output for l in target_layers]) - - return m - - -def transfer_learning_model(teacher_model, number_classes): - for l in teacher_model.layers: - l.trainable = False - x = teacher_model.layers[-1].output - x = Dense(number_classes)(x) - x = Activation('softmax', name="act")(x) - model = Model(teacher_model.input, x) - - opt = keras.optimizers.Adadelta() - model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) - return model - - -def clip_img(X, preprocessing='raw'): - X = utils_keras.reverse_preprocess(X, preprocessing) - X = np.clip(X, 0.0, 255.0) - X = utils_keras.preprocess(X, preprocessing) - return X - - -def get_dataset_path(dataset): - if dataset == "webface": - train_data_dir = '/mnt/data/sixiongshan/data/webface/train' - test_data_dir = '/mnt/data/sixiongshan/data/webface/test' - number_classes = 10575 - number_samples = 475137 - - elif dataset == "vggface1": - train_data_dir = '/mnt/data/sixiongshan/data/vggface/train' - test_data_dir = '/mnt/data/sixiongshan/data/vggface/test' - number_classes = 2622 - number_samples = 1716436 // 3 - - elif dataset == "vggface2": - train_data_dir = '/mnt/data/sixiongshan/data/vggface2/train' - test_data_dir = '/mnt/data/sixiongshan/data/vggface2/test' - number_classes = 8631 - number_samples = 3141890 // 3 - - elif dataset == "scrub": - train_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/train' - test_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/test' - number_classes = 530 - number_samples = 57838 - - elif dataset == "youtubeface": - train_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/train_mtcnnpy_224' - test_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/test_mtcnnpy_224' - number_classes = 1283 - number_samples = 587137 // 5 - - elif dataset == "emily": - train_data_dir = '/mnt/data/sixiongshan/data/emface/train' - test_data_dir = '/mnt/data/sixiongshan/data/emface/test' - number_classes = 66 - number_samples = 6070 - - elif dataset == "pubfig": - train_data_dir = '/mnt/data/sixiongshan/data/pubfig/train' - test_data_dir = '/mnt/data/sixiongshan/data/pubfig/test' - number_classes = 65 - number_samples = 5979 - - elif dataset == "iris": - train_data_dir = '/mnt/data/sixiongshan/data/iris/train' - test_data_dir = '/mnt/data/sixiongshan/data/iris/test' - number_classes = 1000 - number_samples = 14000 - else: - print("Dataset {} does not exist... Abort".format(dataset)) - exit(1) - - return train_data_dir, test_data_dir, number_classes, number_samples - - -def large_dataset_loader(dataset, augmentation=False, test_only=False, image_size=(224, 224)): - train_data_dir, test_data_dir, number_classes, number_samples = get_dataset_path(dataset) - train_generator, test_generator = generator_wrap(train_data_dir=train_data_dir, test_data_dir=test_data_dir, - augmentation=augmentation, - test_only=test_only, image_size=image_size) - return train_generator, test_generator, number_classes, number_samples - - -def sample_from_generator(gen, nb_sample): - x_test, y_test = gen.next() - X_sample = np.zeros((0, x_test.shape[1], x_test.shape[2], x_test.shape[3])) - Y_sample = np.zeros((0, y_test.shape[1])) - - while X_sample.shape[0] < nb_sample: - x, y = gen.next() - X_sample = np.concatenate((X_sample, x), axis=0) - Y_sample = np.concatenate((Y_sample, y), axis=0) - - X_sample = X_sample[:nb_sample] - Y_sample = Y_sample[:nb_sample] - - return X_sample, Y_sample - - -def generator_wrap(train_data_dir=None, test_data_dir=None, augmentation=False, test_only=False, image_size=(224, 224)): - if not test_data_dir: - validation_split = 0.05 - else: - validation_split = 0 - if augmentation: - data_gen = ImageDataGenerator( - preprocessing_function=preprocess_input, - rotation_range=20, - width_shift_range=0.15, - height_shift_range=0.15, - shear_range=0., - zoom_range=0.15, - channel_shift_range=0., - fill_mode='nearest', - cval=0., - horizontal_flip=True, validation_split=validation_split) - else: - data_gen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=validation_split) - - if test_data_dir is None: - train_generator = data_gen.flow_from_directory( - train_data_dir, - target_size=image_size, - batch_size=32, subset='training') - test_generator = data_gen.flow_from_directory( - train_data_dir, - target_size=image_size, - batch_size=32, subset='validation') - else: - if test_only: - train_generator = None - else: - train_generator = data_gen.flow_from_directory( - train_data_dir, - target_size=image_size, - batch_size=32) - test_generator = data_gen.flow_from_directory( - test_data_dir, - target_size=image_size, - batch_size=32) - - return train_generator, test_generator - - -class MergeLayer(Layer): - - def __init__(self, **kwargs): - self.result = None - super(MergeLayer, self).__init__(**kwargs) - - def build(self, input_shape): - # Create a trainable weight variable for this layer. - kernel_1_shape = (5 * 4 * 60, 160) - kernel_2_shape = (4 * 3 * 80, 160) - bias_shape = (160,) - self.kernel_1 = self.add_weight(name='kernel_1', - shape=kernel_1_shape, - initializer='uniform', - trainable=True) - self.kernel_2 = self.add_weight(name='kernel_2', - shape=kernel_2_shape, - initializer='uniform', - trainable=True) - self.bias = self.add_weight(name='bias', - shape=bias_shape, - initializer='uniform', - trainable=True) - super(MergeLayer, self).build(input_shape) # Be sure to call this at the end - - def call(self, x): - layer1 = x[0] - layer2 = x[1] - layer1_r = K.reshape(layer1, (-1, 5 * 4 * 60)) - layer2_r = K.reshape(layer2, (-1, 4 * 3 * 80)) - self.result = K.dot(layer1_r, self.kernel_1) + \ - K.dot(layer2_r, self.kernel_2) + self.bias - return self.result - - def compute_output_shape(self, input_shape): - return K.int_shape(self.result) - - -def load_deepid_model(class_num): - input_shape = (55, 47, 3) - - img_input = Input(shape=input_shape) - h1 = Conv2D(20, (4, 4), strides=(1, 1), padding='valid', name='conv_1')(img_input) - h1 = Activation('relu')(h1) - h1 = MaxPooling2D((2, 2), strides=(2, 2), name='pool_1')(h1) - - h2 = Conv2D(40, (3, 3), strides=(1, 1), padding='valid', name='conv_2')(h1) - h2 = Activation('relu')(h2) - h2 = MaxPooling2D((2, 2), strides=(2, 2), name='pool_2')(h2) - - h3 = Conv2D(60, (3, 3), strides=(1, 1), padding='valid', name='conv_3')(h2) - h3 = Activation('relu')(h3) - h3 = MaxPooling2D((2, 2), strides=(2, 2), name='pool_3')(h3) - - h4 = Conv2D(80, (2, 2), strides=(1, 1), padding='valid', name='conv_4')(h3) - h4 = Activation('relu')(h4) - - h5 = MergeLayer()([h3, h4]) - h5 = Activation('relu')(h5) - - h5 = Dense(class_num, name='fc')(h5) - h5 = Activation('softmax')(h5) - - inputs = img_input - model = Model(inputs, h5, name='vgg_face') - return model - - -def get_label_data(X, Y, target): - X_filter = np.array(X) - Y_filter = np.array(Y) - remain_idx = np.argmax(Y, axis=1) == target - X_filter = X_filter[remain_idx] - Y_filter = Y_filter[remain_idx] - return X_filter, Y_filter - - -def get_other_label_data(X, Y, target): - X_filter = np.array(X) - Y_filter = np.array(Y) - remain_idx = np.argmax(Y, axis=1) != target - X_filter = X_filter[remain_idx] - Y_filter = Y_filter[remain_idx] - return X_filter, Y_filter - - -def get_labels_data(X, Y, target_ls): - assert isinstance(target_ls, list) - X_filter = np.array(X) - Y_filter = np.array(Y) - remain_idx = np.array([False] * len(Y_filter)) - for target in target_ls: - cur_remain_idx = np.argmax(Y, axis=1) == target - remain_idx = np.logical_or(remain_idx, cur_remain_idx) - - X_filter = X_filter[remain_idx] - Y_filter = Y_filter[remain_idx] - return X_filter, Y_filter - - -def get_other_labels_data_except(X, Y, target_ls): - assert isinstance(target_ls, list) - - X_filter = np.array(X) - Y_filter = np.array(Y) - remain_idx = np.array([True] * len(Y_filter)) - for target in target_ls: - cur_remain_idx = np.argmax(Y, axis=1) != target - remain_idx = np.logical_and(remain_idx, cur_remain_idx) - - X_filter = X_filter[remain_idx] - Y_filter = Y_filter[remain_idx] - return X_filter, Y_filter - - -def get_bottom_top_model(model, layer_name): - layer = model.get_layer(layer_name) - bottom_input = Input(model.input_shape[1:]) - bottom_output = bottom_input - top_input = Input(layer.output_shape[1:]) - top_output = top_input - - bottom = True - for layer in model.layers: - if bottom: - bottom_output = layer(bottom_output) - else: - top_output = layer(top_output) - if layer.name == layer_name: - bottom = False - - bottom_model = Model(bottom_input, bottom_output) - top_model = Model(top_input, top_output) - - return bottom_model, top_model - - -def load_end2end_model(arch, number_classes): - if arch == 'resnet': - MODEL = keras.applications.resnet_v2.ResNet152V2(include_top=False, weights='imagenet', pooling='avg', - input_shape=(224, 224, 3)) - elif arch == 'inception': - MODEL = keras.applications.InceptionResNetV2(include_top=False, weights='imagenet', pooling='avg', - input_shape=(224, 224, 3)) - elif arch == 'mobile': - MODEL = keras.applications.mobilenet_v2.MobileNetV2(include_top=False, weights='imagenet', pooling='avg', - input_shape=(224, 224, 3)) - elif arch == 'dense': - MODEL = keras.applications.densenet.DenseNet121(include_top=False, weights='imagenet', pooling='avg', - input_shape=(224, 224, 3)) - - model = load_victim_model(number_classes, MODEL, end2end=True) - return model - - -def load_victim_model(number_classes, teacher_model=None, end2end=False): - for l in teacher_model.layers: - l.trainable = end2end - x = teacher_model.layers[-1].output - - x = Dense(number_classes)(x) - x = Activation('softmax', name="act")(x) - model = Model(teacher_model.input, x) - opt = keras.optimizers.Adadelta() - model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) - return model - - -def add_last_layer(number_classes, teacher_model, cut_to_layer=None): - for l in teacher_model.layers: - l.trainable = False - - if cut_to_layer: - x = teacher_model.layers[cut_to_layer].output - print(teacher_model.layers[cut_to_layer].name) - else: - x = teacher_model.layers[-1].output - - x = Dense(number_classes, name='softmax')(x) - x = Activation('softmax', name="act")(x) - model = Model(teacher_model.input, x) - - opt = keras.optimizers.Adadelta() - model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) - return model - - -def resize_batch(x, target_size=(224, 224), intensity="imagenet"): - if x.shape[:2] == target_size: - return x - - x = utils_keras.reverse_preprocess(x, intensity) - resized = np.array([resize(a, target_size) for a in x]) - return utils_keras.preprocess(resized, intensity) - - -def build_bottleneck_model(model, cut_off): - bottleneck_model = Model(model.input, model.get_layer(cut_off).output) - bottleneck_model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - - return bottleneck_model - - -def split_dataset(X, y, ratio=0.3): - x_appro, x_later, y_appro, y_later = train_test_split(X, y, test_size=ratio, random_state=0) - return x_appro, x_later, y_appro, y_later - - -def data_generator(X, Y, batch_size=32, target_size=(224, 224), intensity='imagenet'): - data_gen = ImageDataGenerator() - data_gen = data_gen.flow(X, Y, batch_size=batch_size) - while True: - cur_X, cur_Y = next(data_gen) - cur_X = resize_batch(cur_X, target_size=target_size, intensity=intensity) - yield np.array(cur_X), cur_Y - - -def evaluate(model, X_test, Y_test, batch_size=32, target_size=(224, 224)): - test_other_gen = data_generator(X_test, Y_test, batch_size=batch_size, target_size=target_size) - if len(X_test) < batch_size * 2: - batch_size = 1 - test_other_step = len(X_test) // batch_size // 2 - acc = model.evaluate_generator(test_other_gen, steps=test_other_step, verbose=0)[1] - return acc - - -def normalize(x): - return x / np.linalg.norm(x, axis=1, keepdims=True) - - -class CloakData(object): - def __init__(self, dataset, img_shape=(224, 224), target_selection_tries=30, protect_class=None): - self.dataset = dataset - self.img_shape = img_shape - self.target_selection_tries = target_selection_tries - - self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) - self.all_labels = sorted(list(os.listdir(self.train_data_dir))) - if protect_class: - self.protect_class = protect_class - else: - self.protect_class = random.choice(self.all_labels) - - self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class]) - print("Protect label: {} | Sybil label: {}".format(self.protect_class, self.sybil_class)) - self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class) - self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class) - # self.target_path, self.target_data = self.select_target_label() - - self.cloaked_protect_train_X = None - self.cloaked_sybil_train_X = None - - self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping() - self.all_training_path = self.get_all_data_path(self.label2path_train) - self.all_test_path = self.get_all_data_path(self.label2path_test) - self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class)) - self.sybil_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.sybil_class)) - - print( - "Find {} protect images | {} sybil images".format(len(self.protect_class_path), len(self.sybil_class_path))) - - def get_class_image_files(self, path): - return [os.path.join(path, f) for f in os.listdir(path)] - - def extractor_ls_predict(self, feature_extractors_ls, X): - feature_ls = [] - for extractor in feature_extractors_ls: - cur_features = extractor.predict(X) - feature_ls.append(cur_features) - concated_feature_ls = np.concatenate(feature_ls, axis=1) - concated_feature_ls = normalize(concated_feature_ls) - return concated_feature_ls - - def load_embeddings(self, feature_extractors_names): - dictionaries = [] - - for extractor_name in feature_extractors_names: - path2emb = pickle.load(open("/home/shansixioing/cloak/embs/{}_emb_norm.p".format(extractor_name), "rb")) - # path2emb = pickle.load(open("/home/shansixioing/cloak/embs/vggface2_inception_emb.p".format(extractor_name), "rb")) - dictionaries.append(path2emb) - merge_dict = {} - for k in dictionaries[0].keys(): - cur_emb = [dic[k] for dic in dictionaries] - merge_dict[k] = np.concatenate(cur_emb) - return merge_dict - - def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'): - # original_feature_x = extractor.predict(self.protect_train_X) - original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X) - - path2emb = self.load_embeddings(feature_extractors_names) - # items = list(path2emb.items()) - teacher_dataset = feature_extractors_names[0].split("_")[0] - # items = [(k, v) for k, v in path2emb.items() if teacher_dataset in k] - items = list(path2emb.items()) - paths = [p[0] for p in items] - embs = [p[1] for p in items] - embs = np.array(embs) - - pair_dist = pairwise_distances(original_feature_x, embs, 'l2') - max_sum = np.min(pair_dist, axis=0) - sorted_idx = np.argsort(max_sum)[::-1] - - highest_num = 0 - paired_target_X = None - final_target_class_path = None - for idx in sorted_idx[:2]: - target_class_path = paths[idx] - cur_target_X = self.load_dir(target_class_path) - cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) - - cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X, - feature_extractors_ls, - metric=metric) - if cur_tot_sum > highest_num: - highest_num = cur_tot_sum - paired_target_X = cur_paired_target_X - final_target_class_path = target_class_path - - np.random.shuffle(paired_target_X) - return final_target_class_path, paired_target_X - - def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'): - features1 = self.extractor_ls_predict(feature_extractors_ls, a) - features2 = self.extractor_ls_predict(feature_extractors_ls, b) - - pair_cos = pairwise_distances(features1, features2, metric) - max_sum = np.min(pair_cos, axis=0) - max_sum_arg = np.argsort(max_sum)[::-1] - max_sum_arg = max_sum_arg[:len(a)] - max_sum = [max_sum[i] for i in max_sum_arg] - paired_target_X = [b[j] for j in max_sum_arg] - paired_target_X = np.array(paired_target_X) - return np.min(max_sum), paired_target_X - - def get_all_data_path(self, label2path): - all_paths = [] - for k, v in label2path.items(): - cur_all_paths = [os.path.join(k, cur_p) for cur_p in v] - all_paths.extend(cur_all_paths) - return all_paths - - def load_label_data(self, label): - train_label_path = os.path.join(self.train_data_dir, label) - test_label_path = os.path.join(self.test_data_dir, label) - train_X = self.load_dir(train_label_path) - test_X = self.load_dir(test_label_path) - return train_X, test_X - - def load_dir(self, path): - assert os.path.exists(path) - x_ls = [] - for file in os.listdir(path): - cur_path = os.path.join(path, file) - im = image.load_img(cur_path, target_size=self.img_shape) - im = image.img_to_array(im) - x_ls.append(im) - raw_x = np.array(x_ls) - return preprocess_input(raw_x) - - def build_data_mapping(self): - label2path_train = {} - label2path_test = {} - idx = 0 - path2idx = {} - for label_name in self.all_labels: - full_path_train = os.path.join(self.train_data_dir, label_name) - full_path_test = os.path.join(self.test_data_dir, label_name) - label2path_train[full_path_train] = list(os.listdir(full_path_train)) - label2path_test[full_path_test] = list(os.listdir(full_path_test)) - for img_file in os.listdir(full_path_train): - path2idx[os.path.join(full_path_train, img_file)] = idx - for img_file in os.listdir(full_path_test): - path2idx[os.path.join(full_path_test, img_file)] = idx - idx += 1 - return label2path_train, label2path_test, path2idx - - def generate_data_post_cloak(self, sybil=False): - assert self.cloaked_protect_train_X is not None - while True: - batch_X = [] - batch_Y = [] - cur_batch_path = random.sample(self.all_training_path, 32) - for p in cur_batch_path: - cur_y = self.path2idx[p] - if p in self.protect_class_path: - cur_x = random.choice(self.cloaked_protect_train_X) - elif sybil and (p in self.sybil_class): - cur_x = random.choice(self.cloaked_sybil_train_X) - else: - im = image.load_img(p, target_size=self.img_shape) - im = image.img_to_array(im) - cur_x = preprocess_input(im) - batch_X.append(cur_x) - batch_Y.append(cur_y) - batch_X = np.array(batch_X) - batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes) - yield batch_X, batch_Y diff --git a/fawkes/eval_cloak.py b/fawkes/eval_cloak.py index 0c3ca2e..474f8ad 100644 --- a/fawkes/eval_cloak.py +++ b/fawkes/eval_cloak.py @@ -4,30 +4,23 @@ sys.path.append("/home/shansixioing/tools/") sys.path.append("/home/shansixioing/cloak/") import argparse -import gen_utils from tensorflow import set_random_seed -from encode_utils import * +from utils import init_gpu, load_extractor, load_victim_model, dump_dictionary_as_json +import os +import numpy as np import random import pickle import re -import locale +from keras.preprocessing import image +from keras.utils import to_categorical +from keras.applications.vgg16 import preprocess_input -loc = locale.getlocale() -locale.setlocale(locale.LC_ALL, loc) +# import locale +# +# loc = locale.getlocale() +# locale.setlocale(locale.LC_ALL, loc) SEEDS = [12345, 23451, 34512, 45123, 51234, 54321, 43215, 32154, 21543, 15432] -IMG_SHAPE = [224, 224, 3] - -MODEL = { - 'vggface1_inception': "0", - 'vggface1_dense': "1", - "vggface2_inception": "2", - "vggface2_dense": "3", - "webface_dense": "4", - "webface_inception": "5", -} - -RES_DIR = '/home/shansixioing/cloak/results/' def select_samples(data_dir): @@ -40,40 +33,30 @@ def select_samples(data_dir): return all_data_path -def generator_wrap(cloak_data, n_uncloaked, n_classes, test=False, validation_split=0.1): +def generator_wrap(cloak_data, n_classes, test=False, validation_split=0.1): if test: - # all_data_path = cloak_data.all_test_path all_data_path = select_samples(cloak_data.test_data_dir) else: - # all_data_path = cloak_data.all_training_path all_data_path = select_samples(cloak_data.train_data_dir) split = int(len(cloak_data.cloaked_protect_train_X) * (1 - validation_split)) cloaked_train_X = cloak_data.cloaked_protect_train_X[:split] - if cloak_data.cloaked_sybil_train_X is not None: - cloaked_sybil_X = cloak_data.cloaked_sybil_train_X #[:args.number_sybil * 131] - # - # for _ in range(len(cloaked_sybil_X) - 131): - # all_data_path.append(cloak_data.sybil_class_path[0]) - - # random seed for selecting uncloaked pictures np.random.seed(12345) - uncloaked_path = np.random.choice(cloak_data.protect_class_path, n_uncloaked).tolist() + + # all_vals = list(cloak_data.path2idx.items()) while True: batch_X = [] batch_Y = [] cur_batch_path = np.random.choice(all_data_path, args.batch_size) for p in cur_batch_path: + # p = p.encode("utf-8").decode("ascii", 'ignore') cur_y = cloak_data.path2idx[p] # protect class and sybil class do not need to appear in test dataset - if test and (re.search(cloak_data.protect_class, p) or re.search(cloak_data.sybil_class, p)): + if test and (re.search(cloak_data.protect_class, p)): continue # protect class images in train dataset - elif p in cloak_data.protect_class_path and p not in uncloaked_path: + elif p in cloak_data.protect_class_path: cur_x = random.choice(cloaked_train_X) - # sybil class in train dataset - elif p in cloak_data.sybil_class_path and cloak_data.cloaked_sybil_train_X is not None: - cur_x = random.choice(cloaked_sybil_X) else: im = image.load_img(p, target_size=cloak_data.img_shape) im = image.img_to_array(im) @@ -108,45 +91,46 @@ def main(): SEED = SEEDS[args.seed_idx] random.seed(SEED) set_random_seed(SEED) - gen_utils.init_gpu(args.gpu) + init_gpu(args.gpu) if args.dataset == 'pubfig': N_CLASSES = 65 - CLOAK_DIR = "{}_tm{}_tgt57_r1.0_th{}".format(args.dataset, args.model_idx, args.th) + CLOAK_DIR = args.cloak_data elif args.dataset == 'scrub': N_CLASSES = 530 - CLOAK_DIR = "{}_tm{}_tgtPatrick_Dempsey_r1.0_th{}_joint".format(args.dataset, args.model_idx, args.th) - elif args.dataset == 'webface': - N_CLASSES = 10575 - CLOAK_DIR = "{}_tm{}_tgt1640351_r1.0_th0.01/".format(args.dataset, args.model_idx) + CLOAK_DIR = args.cloak_data else: raise ValueError + + CLOAK_DIR = CLOAK_DIR + "_th{}_sd{}".format(args.th, int(args.sd)) print(CLOAK_DIR) - CLOAK_DIR = os.path.join(RES_DIR, CLOAK_DIR) + CLOAK_DIR = os.path.join("../results", CLOAK_DIR) RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb')) print("Build attacker's model") cloak_data = RES['cloak_data'] EVAL_RES = {} - train_generator = generator_wrap(cloak_data, n_uncloaked=args.n_uncloaked, n_classes=N_CLASSES, + train_generator = generator_wrap(cloak_data, n_classes=N_CLASSES, validation_split=args.validation_split) - test_generator = generator_wrap(cloak_data, test=True, n_uncloaked=args.n_uncloaked, n_classes=N_CLASSES, + test_generator = generator_wrap(cloak_data, test=True, n_classes=N_CLASSES, validation_split=args.validation_split) + EVAL_RES['transfer_model'] = args.transfer_model - if args.end2end: - model = load_end2end_model("dense", N_CLASSES) - else: - base_model = load_extractor(args.transfer_model) - model = load_victim_model(teacher_model=base_model, number_classes=N_CLASSES) + + base_model = load_extractor(args.transfer_model) + model = load_victim_model(teacher_model=base_model, number_classes=N_CLASSES) original_X, original_Y = eval_uncloaked_test_data(cloak_data, N_CLASSES) cloaked_test_X, cloaked_test_Y = eval_cloaked_test_data(cloak_data, N_CLASSES, validation_split=args.validation_split) - model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32, - validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=2, - use_multiprocessing=True, workers=3) + try: + model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32, + validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=1, + use_multiprocessing=False, workers=1) + except KeyboardInterrupt: + pass _, acc_original = model.evaluate(original_X, original_Y, verbose=0) print("Accuracy on uncloaked/original images TEST: {:.4f}".format(acc_original)) @@ -156,47 +140,38 @@ def main(): print("Accuracy on cloaked images TEST: {:.4f}".format(acc_cloaked)) EVAL_RES['acc_cloaked'] = acc_cloaked - # pred = model.predict_generator(test_generator, verbose=0, steps=10) - # pred = np.argmax(pred, axis=1) - # print(pred) _, other_acc = model.evaluate_generator(test_generator, verbose=0, steps=50) print("Accuracy on other classes {:.4f}".format(other_acc)) EVAL_RES['other_acc'] = other_acc - gen_utils.dump_dictionary_as_json(EVAL_RES, - os.path.join(CLOAK_DIR, "{}_eval_sybil_uncloaked{}_seed{}_th{}.json".format( - args.transfer_model, args.end2end, args.seed_idx, args.th))) + dump_dictionary_as_json(EVAL_RES, + os.path.join(CLOAK_DIR, "eval_seed{}_th{}.json".format(args.seed_idx, args.th))) def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=str, - help='GPU id', default='1') - parser.add_argument('--n_uncloaked', type=int, - help='number of uncloaked images', default=0) + help='GPU id', default='2') parser.add_argument('--seed_idx', type=int, help='random seed index', default=0) parser.add_argument('--dataset', type=str, - help='name of dataset', default='pubfig') - parser.add_argument('--model_idx', type=str, - help='teacher model index', default="2") + help='name of dataset', default='scrub') + parser.add_argument('--cloak_data', type=str, + help='name of the cloak result directory', + default='scrub_webface_dense_robust_protectPatrick_Dempsey') + + parser.add_argument('--sd', type=int, default=1e6) + parser.add_argument('--th', type=float, default=0.01) + parser.add_argument('--transfer_model', type=str, - help='student model', default='vggface2_inception') - parser.add_argument('--end2end', type=int, - help='whether use end2end', default=0) + help='student model', default='../feature_extractors/vggface2_inception_extract.h5') parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--validation_split', type=float, default=0.1) - parser.add_argument('--use_sybil', type=int, - help='whether use sybil class', default=0) - parser.add_argument('--number_sybil', type=int, - help='whether use sybil class', default=1) parser.add_argument('--n_epochs', type=int, default=3) - parser.add_argument('--th', type=float, default=0.01) - parser.add_argument('--limit', type=int, default=0) return parser.parse_args(argv) if __name__ == '__main__': args = parse_arguments(sys.argv[1:]) main() -# python3 eval_cloak.py --gpu 2 --n_uncloaked 0 --dataset pubfig --model_idx 5 --transfer_model webface_inception \ No newline at end of file +# python3 eval_cloak.py --gpu 2 --n_uncloaked 0 --dataset pubfig --model_idx 5 --transfer_model webface_inception diff --git a/fawkes/protection.py b/fawkes/protection.py index 201cb17..4e28531 100644 --- a/fawkes/protection.py +++ b/fawkes/protection.py @@ -1,56 +1,35 @@ +import argparse +import os +import pickle +import random import sys -sys.path.append("/home/shansixioing/tools/") -sys.path.append("/home/shansixioing/cloak/") - -import argparse -from tensorflow import set_random_seed -from .differentiator import FawkesMaskGeneration -import os import numpy as np -import random -import pickle -from .utils import load_extractor, CloakData, init_gpu +from differentiator import FawkesMaskGeneration +from tensorflow import set_random_seed +from utils import load_extractor, CloakData, init_gpu +# random.seed(12243) np.random.seed(122412) set_random_seed(12242) -SYBIL_ONLY = False - -NUM_IMG_PROTECTED = 20 # Number of images used to optimize the target class -BATCH_SIZE = 20 - -MODEL_IDX = { - 'vggface1_inception': "0", - 'vggface1_dense': "1", - "vggface2_inception": "2", - "vggface2_dense": "3", - "webface_dense": "4", - "webface_inception": "5", -} - -IDX2MODEL = {v: k for k, v in MODEL_IDX.items()} +NUM_IMG_PROTECTED = 10 # Number of images used to optimize the target class +BATCH_SIZE = 10 IMG_SHAPE = [224, 224, 3] -GLOBAL_MASK = 0 - -MAXIMIZE = False -MAX_ITER = 500 -INITIAL_CONST = 1e6 -LR = 0.1 +MAX_ITER = 1000 -def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, sybil=False, th=0.01): +def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, th=0.01): image_X = image_X[:number_protect] - differentiator = FawkesMaskGeneration(sess, feature_extractors_ls, batch_size=BATCH_SIZE, mimic_img=True, intensity_range='imagenet', - initial_const=INITIAL_CONST, - learning_rate=LR, + initial_const=args.sd, + learning_rate=args.lr, max_iterations=MAX_ITER, l_threshold=th, verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:]) @@ -62,61 +41,43 @@ def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, ta return cloaked_image_X -def save_results(RES, path): - pickle.dump(RES, open(path, "wb")) - - def perform_defense(): RES = {} sess = init_gpu(args.gpu) - DSSIM_THRESHOLD = args.th - - FEATURE_EXTRACTORS = [IDX2MODEL[args.model_idx]] - MODEL_HASH = "".join(MODEL_IDX[m] for m in FEATURE_EXTRACTORS) + FEATURE_EXTRACTORS = [args.feature_extractor] RES_DIR = '../results/' + RES['num_img_protected'] = NUM_IMG_PROTECTED RES['extractors'] = FEATURE_EXTRACTORS num_protect = NUM_IMG_PROTECTED - print(FEATURE_EXTRACTORS) - feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS] - + print("Loading {} for optimization".format(args.feature_extractor)) + feature_extractors_ls = [load_extractor(name, layer_idx=args.layer_idx) for name in FEATURE_EXTRACTORS] protect_class = args.protect_class cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class) + model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "") + RES_FILE_NAME = "{}_{}_protect{}_th{}_sd{}".format(args.dataset, model_name, cloak_data.protect_class, args.th, + args.sd) + RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME) + if os.path.exists(RES_FILE_NAME): + exit(1) print("Protect Class: ", cloak_data.protect_class) - if "robust" in FEATURE_EXTRACTORS[0]: - non_robust = MODEL_IDX["_".join(FEATURE_EXTRACTORS[0].split("_")[:2])] - if args.dataset == 'pubfig': - CLOAK_DIR = 'pubfig_tm{}_tgt57_r1.0_th0.01'.format(non_robust) - CLOAK_DIR = os.path.join(RES_DIR, CLOAK_DIR) - RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb')) - cloak_data = RES['cloak_data'] - elif args.dataset == 'scrub': - CLOAK_DIR = 'scrub_tm{}_tgtPatrick_Dempsey_r1.0_th0.01'.format(non_robust) - CLOAK_DIR = os.path.join(RES_DIR, CLOAK_DIR) - RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb')) - cloak_data = RES['cloak_data'] - else: - cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls, - FEATURE_EXTRACTORS) + cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls, + FEATURE_EXTRACTORS) - RES_FILE_NAME = "{}_tm{}_tgt{}_r{}_th{}".format(args.dataset, MODEL_HASH, cloak_data.protect_class, RATIO, - DSSIM_THRESHOLD) - RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME) + os.makedirs(RES_DIR, exist_ok=True) os.makedirs(RES_FILE_NAME, exist_ok=True) - print("Protect Current Label Data...") - cloak_image_X = diff_protected_data(sess, feature_extractors_ls, cloak_data.protect_train_X, number_protect=num_protect, - target_X=cloak_data.target_data, sybil=False, th=DSSIM_THRESHOLD) + target_X=cloak_data.target_data, th=args.th) cloak_data.cloaked_protect_train_X = cloak_image_X RES['cloak_data'] = cloak_data - save_results(RES, os.path.join(RES_FILE_NAME, 'cloak_data.p')) + pickle.dump(RES, open(os.path.join(RES_FILE_NAME, 'cloak_data.p'), "wb")) def parse_arguments(argv): @@ -124,11 +85,18 @@ def parse_arguments(argv): parser.add_argument('--gpu', type=str, help='GPU id', default='0') parser.add_argument('--dataset', type=str, - help='name of dataset', default='pubfig') - parser.add_argument('--model_idx', type=str, - help='teacher model index', default="3") + help='name of dataset', default='scrub') + parser.add_argument('--feature-extractor', type=str, + help="name of the feature extractor used for optimization", + default="../feature_extractors/webface_dense_robust_extract.h5") + parser.add_argument('--layer-idx', type=int, + help="the idx of the layer of neuron that are used as feature space", + default=-3) + parser.add_argument('--th', type=float, default=0.01) + parser.add_argument('--sd', type=int, default=1e4) parser.add_argument('--protect_class', type=str, default=None) + parser.add_argument('--lr', type=float, default=0.1) return parser.parse_args(argv) diff --git a/fawkes/utils.py b/fawkes/utils.py index c40deca..430f353 100644 --- a/fawkes/utils.py +++ b/fawkes/utils.py @@ -1,3 +1,4 @@ +import json import os import pickle import random @@ -7,11 +8,26 @@ import keras.backend as K import numpy as np import tensorflow as tf from keras.applications.vgg16 import preprocess_input +from keras.layers import Dense, Activation +from keras.models import Model from keras.preprocessing import image from keras.utils import to_categorical from sklearn.metrics import pairwise_distances +def clip_img(X, preprocessing='raw'): + X = reverse_preprocess(X, preprocessing) + X = np.clip(X, 0.0, 255.0) + X = preprocess(X, preprocessing) + return X + + +def dump_dictionary_as_json(dict, outfile): + j = json.dumps(dict) + with open(outfile, "wb") as f: + f.write(j.encode()) + + def fix_gpu_memory(mem_fraction=1): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) @@ -25,6 +41,19 @@ def fix_gpu_memory(mem_fraction=1): return sess +def load_victim_model(number_classes, teacher_model=None, end2end=False): + for l in teacher_model.layers: + l.trainable = end2end + x = teacher_model.layers[-1].output + + x = Dense(number_classes)(x) + x = Activation('softmax', name="act")(x) + model = Model(teacher_model.input, x) + opt = keras.optimizers.Adadelta() + model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) + return model + + def init_gpu(gpu_index, force=False): if isinstance(gpu_index, list): gpu_num = ','.join([str(i) for i in gpu_index]) @@ -152,93 +181,43 @@ def imagenet_reverse_preprocessing(x, data_format=None): return x -def imagenet_reverse_preprocessing_cntk(x, data_format=None): - import keras.backend as K - """ Reverse preprocesses a tensor encoding a batch of images. - # Arguments - x: input Numpy tensor, 4D. - data_format: data format of the image tensor. - # Returns - Preprocessed tensor. - """ - x = np.array(x) - if data_format is None: - data_format = K.image_data_format() - assert data_format in ('channels_last', 'channels_first') +def build_bottleneck_model(model, cut_off): + bottleneck_model = Model(model.input, model.get_layer(cut_off).output) + bottleneck_model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + return bottleneck_model - if data_format == 'channels_first': - # Zero-center by mean pixel - x[:, 0, :, :] += 114.0 - x[:, 1, :, :] += 114.0 - x[:, 2, :, :] += 114.0 - # 'BGR'->'RGB' - x = x[:, ::-1, :, :] + +def load_extractor(name, layer_idx=None): + model = keras.models.load_model(name) + + if "extract" in name.split("/")[-1]: + model = keras.models.load_model(name) else: - # Zero-center by mean pixel - x[:, :, :, 0] += 114.0 - x[:, :, :, 1] += 114.0 - x[:, :, :, 2] += 114.0 - # 'BGR'->'RGB' - x = x[:, :, :, ::-1] - return x + print("Convert a model to a feature extractor") + model = build_bottleneck_model(model, model.layers[layer_idx].name) + model.save(name + "extract") + model = keras.models.load_model(name + "extract") - -def load_extractor(name): - model = keras.models.load_model("/home/shansixioing/cloak/models/extractors/{}_extract.h5".format(name)) return model def get_dataset_path(dataset): - if dataset == "webface": - train_data_dir = '/mnt/data/sixiongshan/data/webface/train' - test_data_dir = '/mnt/data/sixiongshan/data/webface/test' - number_classes = 10575 - number_samples = 475137 - - elif dataset == "vggface1": - train_data_dir = '/mnt/data/sixiongshan/data/vggface/train' - test_data_dir = '/mnt/data/sixiongshan/data/vggface/test' - number_classes = 2622 - number_samples = 1716436 // 3 - - elif dataset == "vggface2": - train_data_dir = '/mnt/data/sixiongshan/data/vggface2/train' - test_data_dir = '/mnt/data/sixiongshan/data/vggface2/test' - number_classes = 8631 - number_samples = 3141890 // 3 - - elif dataset == "scrub": - train_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/train' - test_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/test' + if dataset == "scrub": + train_data_dir = '../data/scrub/train' + test_data_dir = '../data/scrub/test' number_classes = 530 number_samples = 57838 - - elif dataset == "youtubeface": - train_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/train_mtcnnpy_224' - test_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/test_mtcnnpy_224' - number_classes = 1283 - number_samples = 587137 // 5 - - elif dataset == "emily": - train_data_dir = '/mnt/data/sixiongshan/data/emface/train' - test_data_dir = '/mnt/data/sixiongshan/data/emface/test' - number_classes = 66 - number_samples = 6070 - elif dataset == "pubfig": - train_data_dir = '/mnt/data/sixiongshan/data/pubfig/train' - test_data_dir = '/mnt/data/sixiongshan/data/pubfig/test' + train_data_dir = '../data/pubfig/train' + test_data_dir = '../data/pubfig/test' number_classes = 65 number_samples = 5979 - - elif dataset == "iris": - train_data_dir = '/mnt/data/sixiongshan/data/iris/train' - test_data_dir = '/mnt/data/sixiongshan/data/iris/test' - number_classes = 1000 - number_samples = 14000 else: - print("Dataset {} does not exist... Abort".format(dataset)) - exit(1) + raise Exception( + "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( + dataset)) return train_data_dir, test_data_dir, number_classes, number_samples @@ -261,7 +240,6 @@ class CloakData(object): self.protect_class = random.choice(self.all_labels) self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class]) - print("Protect label: {} | Sybil label: {}".format(self.protect_class, self.sybil_class)) self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class) self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class) @@ -290,11 +268,11 @@ class CloakData(object): def load_embeddings(self, feature_extractors_names): dictionaries = [] - for extractor_name in feature_extractors_names: - path2emb = pickle.load(open("/home/shansixioing/cloak/embs/{}_emb_norm.p".format(extractor_name), "rb")) - # path2emb = pickle.load(open("/home/shansixioing/cloak/embs/vggface2_inception_emb.p".format(extractor_name), "rb")) + extractor_name = extractor_name.split("/")[-1].split('.')[0].replace("_extract", "") + path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) dictionaries.append(path2emb) + merge_dict = {} for k in dictionaries[0].keys(): cur_emb = [dic[k] for dic in dictionaries]