diff --git a/extract_features.py b/extract_features.py index f186322a..febbd2fa 100644 --- a/extract_features.py +++ b/extract_features.py @@ -7,8 +7,8 @@ import argparse, os, json import h5py import numpy as np -from scipy.misc import imread, imresize - +# from scipy.misc import imread, imresize +from cv2 import imread, resize as imresize import torch import torchvision @@ -86,8 +86,8 @@ def main(args): i0 = 0 cur_batch = [] for i, (path, idx) in enumerate(input_paths): - img = imread(path, mode='RGB') - img = imresize(img, img_size, interp='bicubic') + img = imread(path) + img = imresize(img, img_size) img = img.transpose(2, 0, 1)[None] cur_batch.append(img) if len(cur_batch) == args.batch_size: diff --git a/mac_cell.py b/mac_cell.py index 2fc78a1f..28af5313 100644 --- a/mac_cell.py +++ b/mac_cell.py @@ -27,7 +27,7 @@ 3. The Write Unit integrates the retrieved information to the previous hidden memory state, given the value of the control state, to perform the current reasoning operation. ''' -class MACCell(tf.nn.rnn_cell.RNNCell): +class MACCell(tf.compat.v1.nn.rnn_cell.RNNCell): '''Initialize the MAC cell. (Note that in the current version the cell is stateful -- @@ -133,7 +133,7 @@ def output_size(self): def control(self, controlInput, inWords, outWords, questionLengths, control, contControl = None, name = "", reuse = None): - with tf.variable_scope("control" + name, reuse = reuse): + with tf.compat.v1.variable_scope("control" + name, reuse = reuse): dim = config.ctrlDim ## Step 1: compute "continuous" control state given previous control and question. @@ -207,14 +207,14 @@ def control(self, controlInput, inWords, outWords, questionLengths, [batchSize, memDim] ''' def read(self, knowledgeBase, memory, control, name = "", reuse = None): - with tf.variable_scope("read" + name, reuse = reuse): + with tf.compat.v1.variable_scope("read" + name, reuse = reuse): dim = config.memDim ## memory dropout if config.memoryVariationalDropout: memory = ops.applyVarDpMask(memory, self.memDpMask, self.dropouts["memory"]) else: - memory = tf.nn.dropout(memory, self.dropouts["memory"]) + memory = tf.compat.v1.nn.dropout(memory, self.dropouts["memory"]) ## Step 1: knowledge base / memory interactions # parameters for knowledge base and memory projection @@ -303,7 +303,7 @@ def read(self, knowledgeBase, memory, control, name = "", reuse = None): [batchSize, memDim] ''' def write(self, memory, info, control, contControl = None, name = "", reuse = None): - with tf.variable_scope("write" + name, reuse = reuse): + with tf.compat.v1.variable_scope("write" + name, reuse = reuse): # optionally project info if config.writeInfoProj: @@ -374,8 +374,8 @@ def write(self, memory, info, control, contControl = None, name = "", reuse = No return newMemory - def memAutoEnc(newMemory, info, control, name = "", reuse = None): - with tf.variable_scope("memAutoEnc" + name, reuse = reuse): + def memAutoEnc(self, newMemory, info, control, name = "", reuse = None): + with tf.compat.v1.variable_scope("memAutoEnc" + name, reuse = reuse): # inputs to auto encoder features = info if config.autoEncMemInputs == "INFO" else newMemory features = ops.linear(features, config.memDim, config.ctrlDim, @@ -419,7 +419,7 @@ def memAutoEnc(newMemory, info, control, name = "", reuse = None): ''' def __call__(self, inputs, state, scope = None): scope = scope or type(self).__name__ - with tf.variable_scope(scope, reuse = self.reuse): # as tfscope + with tf.compat.v1.variable_scope(scope, reuse = self.reuse): # as tfscope control = state.control memory = state.memory @@ -460,7 +460,7 @@ def __call__(self, inputs, state, scope = None): if config.writeDropout < 1.0: # write unit - info = tf.nn.dropout(info, self.dropouts["write"]) + info = tf.compat.v1.nn.dropout(info, self.dropouts["write"]) newMemory = self.write(memory, info, newControl, self.contControl, name = cellName, reuse = cellReuse) @@ -495,9 +495,9 @@ def __call__(self, inputs, state, scope = None): ''' def initState(self, name, dim, initType, batchSize): if initType == "PRM": - prm = tf.get_variable(name, shape = (dim, ), + prm = tf.compat.v1.get_variable(name, shape = (dim, ), initializer = tf.random_normal_initializer()) - initState = tf.tile(tf.expand_dims(prm, axis = 0), [batchSize, 1]) + initState = tf.compat.v1.tile(tf.expand_dims(prm, axis = 0), [batchSize, 1]) elif initType == "ZERO": initState = tf.zeros((batchSize, dim), dtype = tf.float32) else: # "Q" @@ -516,8 +516,8 @@ def initState(self, name, dim, initType, batchSize): Returns the updated word sequence and lengths. ''' - def addNullWord(words, lengths): - nullWord = tf.get_variable("zeroWord", shape = (1 , config.ctrlDim), initializer = tf.random_normal_initializer()) + def addNullWord(self, words, lengths): + nullWord = tf.compat.v1.get_variable("zeroWord", shape = (1 , config.ctrlDim), initializer = tf.random_normal_initializer()) nullWord = tf.tile(tf.expand_dims(nullWord, axis = 0), [self.batchSize, 1, 1]) words = tf.concat([nullWord, words], axis = 1) lengths += 1 @@ -582,7 +582,7 @@ def zero_state(self, batchSize, dtype = tf.float32): # if config.controlCoverage: # self.coverage = tf.zeros((batchSize, tf.shape(words)[1]), dtype = tf.float32) - # self.coverageBias = tf.get_variable("coverageBias", shape = (), + # self.coverageBias = tf.compat.v1.get_variable("coverageBias", shape = (), # initializer = config.controlCoverageBias) ## initialize memory variational dropout mask diff --git a/main.py b/main.py index 198992f2..4ce392b7 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,8 @@ from model import MACnet from collections import defaultdict +tf.compat.v1.disable_eager_execution() + ############################################# loggers ############################################# # Writes log header to file @@ -151,7 +153,7 @@ def writePreds(preprocessor, evalRes, extraEvalRes): ############################################# session ############################################# # Initializes TF session. Sets GPU memory configuration. def setSession(): - sessionConfig = tf.ConfigProto(allow_soft_placement = True, log_device_placement = False) + sessionConfig = tf.compat.v1.ConfigProto(allow_soft_placement = True, log_device_placement = False) if config.allowGrowth: sessionConfig.gpu_options.allow_growth = True if config.maxMemory < 1.0: @@ -161,17 +163,17 @@ def setSession(): ############################################## savers ############################################# # Initializes savers (standard, optional exponential-moving-average and optional for subset of variables) def setSavers(model): - saver = tf.train.Saver(max_to_keep = config.weightsToKeep) + saver = tf.compat.v1.train.Saver(max_to_keep = config.weightsToKeep) subsetSaver = None if config.saveSubset: isRelevant = lambda var: any(s in var.name for s in config.varSubset) - relevantVars = [var for var in tf.global_variables() if isRelevant(var)] - subsetSaver = tf.train.Saver(relevantVars, max_to_keep = config.weightsToKeep, allow_empty = True) + relevantVars = [var for var in tf.compat.v1.global_variables() if isRelevant(var)] + subsetSaver = tf.compat.v1.train.Saver(relevantVars, max_to_keep = config.weightsToKeep, allow_empty = True) emaSaver = None if config.useEMA: - emaSaver = tf.train.Saver(model.emaDict, max_to_keep = config.weightsToKeep) + emaSaver = tf.compat.v1.train.Saver(model.emaDict, max_to_keep = config.weightsToKeep) return { "saver": saver, @@ -657,7 +659,7 @@ def main(): config.gpusNum = len(config.gpus.split(",")) os.environ["CUDA_VISIBLE_DEVICES"] = config.gpus - tf.logging.set_verbosity(tf.logging.ERROR) + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # process data print(bold("Preprocess data...")) @@ -673,7 +675,7 @@ def main(): print("took {} seconds".format(bcolored("{:.2f}".format(time.time() - start), "blue"))) # initializer - init = tf.global_variables_initializer() + init = tf.compat.v1.global_variables_initializer() # savers savers = setSavers(model) @@ -682,7 +684,7 @@ def main(): # sessionConfig sessionConfig = setSession() - with tf.Session(config = sessionConfig) as sess: + with tf.compat.v1.Session(config = sessionConfig) as sess: # ensure no more ops are added after model is built sess.graph.finalize() @@ -711,7 +713,7 @@ def main(): # save weights saver.save(sess, config.weightsFile(epoch)) if config.saveSubset: - subsetSaver.save(sess, config.subsetWeightsFile(epoch)) + config.saveSubset.save(sess, config.subsetWeightsFile(epoch)) # load EMA weights if config.useEMA: diff --git a/mi_gru_cell.py b/mi_gru_cell.py index f2ed2ee6..30851f1e 100644 --- a/mi_gru_cell.py +++ b/mi_gru_cell.py @@ -1,7 +1,7 @@ import tensorflow as tf import numpy as np -class MiGRUCell(tf.nn.rnn_cell.RNNCell): +class MiGRUCell(tf.compat.v1.nn.rnn_cell.RNNCell): def __init__(self, num_units, input_size = None, activation = tf.tanh, reuse = None): self.numUnits = num_units self.activation = activation @@ -16,19 +16,19 @@ def output_size(self): return self.numUnits def mulWeights(self, inp, inDim, outDim, name = ""): - with tf.variable_scope("weights" + name): - W = tf.get_variable("weights", shape = (inDim, outDim), - initializer = tf.contrib.layers.xavier_initializer()) + with tf.compat.v1.variable_scope("weights" + name): + W = tf.compat.v1.get_variable("weights", shape = (inDim, outDim), + initializer = tf.compat.v1.keras.initializers.glorot_normal()) output = tf.matmul(inp, W) return output def addBiases(self, inp1, inp2, dim, bInitial = 0, name = ""): - with tf.variable_scope("additiveBiases" + name): - b = tf.get_variable("biases", shape = (dim,), + with tf.compat.v1.variable_scope("additiveBiases" + name): + b = tf.compat.v1.get_variable("biases", shape = (dim,), initializer = tf.zeros_initializer()) + bInitial - with tf.variable_scope("multiplicativeBias" + name): - beta = tf.get_variable("biases", shape = (3 * dim,), + with tf.compat.v1.variable_scope("multiplicativeBias" + name): + beta = tf.compat.v1.get_variable("biases", shape = (3 * dim,), initializer = tf.ones_initializer()) Wx, Uh, inter = tf.split(beta * tf.concat([inp1, inp2, inp1 * inp2], axis = 1), @@ -38,7 +38,7 @@ def addBiases(self, inp1, inp2, dim, bInitial = 0, name = ""): def __call__(self, inputs, state, scope = None): scope = scope or type(self).__name__ - with tf.variable_scope(scope, reuse = self.reuse): + with tf.compat.v1.variable_scope(scope, reuse = self.reuse): inputSize = int(inputs.shape[1]) Wxr = self.mulWeights(inputs, inputSize, self.numUnits, name = "Wxr") diff --git a/mi_lstm_cell.py b/mi_lstm_cell.py index 6cfa2f66..3764be78 100644 --- a/mi_lstm_cell.py +++ b/mi_lstm_cell.py @@ -1,7 +1,7 @@ import tensorflow as tf import numpy as np -class MiLSTMCell(tf.nn.rnn_cell.RNNCell): +class MiLSTMCell(tf.compat.v1.nn.rnn_cell.RNNCell): def __init__(self, num_units, forget_bias = 1.0, input_size = None, state_is_tuple = True, activation = tf.tanh, reuse = None): self.numUnits = num_units @@ -11,25 +11,25 @@ def __init__(self, num_units, forget_bias = 1.0, input_size = None, @property def state_size(self): - return tf.nn.rnn_cell.LSTMStateTuple(self.numUnits, self.numUnits) + return tf.compat.v1.nn.rnn_cell.LSTMStateTuple(self.numUnits, self.numUnits) @property def output_size(self): return self.numUnits def mulWeights(self, inp, inDim, outDim, name = ""): - with tf.variable_scope("weights" + name): - W = tf.get_variable("weights", shape = (inDim, outDim), - initializer = tf.contrib.layers.xavier_initializer()) + with tf.compat.v1.variable_scope("weights" + name): + W = tf.compat.v1.get_variable("weights", shape = (inDim, outDim), + initializer = tf.compat.v1.keras.initializers.glorot_normal()) output = tf.matmul(inp, W) return output def addBiases(self, inp1, inp2, dim, name = ""): - with tf.variable_scope("additiveBiases" + name): - b = tf.get_variable("biases", shape = (dim,), + with tf.compat.v1.variable_scope("additiveBiases" + name): + b = tf.compat.v1.get_variable("biases", shape = (dim,), initializer = tf.zeros_initializer()) - with tf.variable_scope("multiplicativeBias" + name): - beta = tf.get_variable("biases", shape = (3 * dim,), + with tf.compat.v1.variable_scope("multiplicativeBias" + name): + beta = tf.compat.v1.get_variable("biases", shape = (3 * dim,), initializer = tf.ones_initializer()) Wx, Uh, inter = tf.split(beta * tf.concat([inp1, inp2, inp1 * inp2], axis = 1), @@ -39,7 +39,7 @@ def addBiases(self, inp1, inp2, dim, name = ""): def __call__(self, inputs, state, scope = None): scope = scope or type(self).__name__ - with tf.variable_scope(scope, reuse = self.reuse): + with tf.compat.v1.variable_scope(scope, reuse = self.reuse): c, h = state inputSize = int(inputs.shape[1]) @@ -68,10 +68,10 @@ def __call__(self, inputs, state, scope = None): self.activation(j)) newH = self.activation(newC) * tf.nn.sigmoid(o) - newState = tf.nn.rnn_cell.LSTMStateTuple(newC, newH) + newState = tf.compat.v1.nn.rnn_cell.LSTMStateTuple(newC, newH) return newH, newState def zero_state(self, batchSize, dtype = tf.float32): - return tf.nn.rnn_cell.LSTMStateTuple(tf.zeros((batchSize, self.numUnits), dtype = dtype), + return tf.compat.v1.nn.rnn_cell.LSTMStateTuple(tf.zeros((batchSize, self.numUnits), dtype = dtype), tf.zeros((batchSize, self.numUnits), dtype = dtype)) \ No newline at end of file diff --git a/model.py b/model.py index f3114089..4a81229a 100644 --- a/model.py +++ b/model.py @@ -56,40 +56,40 @@ def __init__(self, embeddingsInit, answerDict): ''' # change to H x W x C? def addPlaceholders(self): - with tf.variable_scope("Placeholders"): + with tf.compat.v1.variable_scope("Placeholders"): ## data # questions - self.questionsIndicesAll = tf.placeholder(tf.int32, shape = (None, None)) - self.questionLengthsAll = tf.placeholder(tf.int32, shape = (None, )) + self.questionsIndicesAll = tf.compat.v1.placeholder(tf.int32, shape = (None, None)) + self.questionLengthsAll = tf.compat.v1.placeholder(tf.int32, shape = (None, )) # images # put image known dimension as last dim? - self.imagesPlaceholder = tf.placeholder(tf.float32, shape = (None, None, None, None)) + self.imagesPlaceholder = tf.compat.v1.placeholder(tf.float32, shape = (None, None, None, None)) self.imagesAll = tf.transpose(self.imagesPlaceholder, (0, 2, 3, 1)) # self.imageH = tf.shape(self.imagesAll)[1] # self.imageW = tf.shape(self.imagesAll)[2] # answers - self.answersIndicesAll = tf.placeholder(tf.int32, shape = (None, )) + self.answersIndicesAll = tf.compat.v1.placeholder(tf.int32, shape = (None, )) ## optimization - self.lr = tf.placeholder(tf.float32, shape = ()) - self.train = tf.placeholder(tf.bool, shape = ()) + self.lr = tf.compat.v1.placeholder(tf.float32, shape = ()) + self.train = tf.compat.v1.placeholder(tf.bool, shape = ()) self.batchSizeAll = tf.shape(self.questionsIndicesAll)[0] ## dropouts # TODO: change dropouts to be 1 - current self.dropouts = { - "encInput": tf.placeholder(tf.float32, shape = ()), - "encState": tf.placeholder(tf.float32, shape = ()), - "stem": tf.placeholder(tf.float32, shape = ()), - "question": tf.placeholder(tf.float32, shape = ()), - # self.dropouts["question"]Out = tf.placeholder(tf.float32, shape = ()) - # self.dropouts["question"]MAC = tf.placeholder(tf.float32, shape = ()) - "read": tf.placeholder(tf.float32, shape = ()), - "write": tf.placeholder(tf.float32, shape = ()), - "memory": tf.placeholder(tf.float32, shape = ()), - "output": tf.placeholder(tf.float32, shape = ()) + "encInput": tf.compat.v1.placeholder(tf.float32, shape = ()), + "encState": tf.compat.v1.placeholder(tf.float32, shape = ()), + "stem": tf.compat.v1.placeholder(tf.float32, shape = ()), + "question": tf.compat.v1.placeholder(tf.float32, shape = ()), + # self.dropouts["question"]Out = tf.compat.v1.placeholder(tf.float32, shape = ()) + # self.dropouts["question"]MAC = tf.compat.v1.placeholder(tf.float32, shape = ()) + "read": tf.compat.v1.placeholder(tf.float32, shape = ()), + "write": tf.compat.v1.placeholder(tf.float32, shape = ()), + "memory": tf.compat.v1.placeholder(tf.float32, shape = ()), + "output": tf.compat.v1.placeholder(tf.float32, shape = ()) } # batch norm params @@ -103,7 +103,7 @@ def addPlaceholders(self): # self.dropouts["read"] = self.dropouts["_read"] # if config.tempDynamic: - # self.tempAnnealRate = tf.placeholder(tf.float32, shape = ()) + # self.tempAnnealRate = tf.compat.v1.placeholder(tf.float32, shape = ()) self.H, self.W, self.imageInDim = config.imageDims @@ -137,7 +137,7 @@ def createFeedDict(self, data, images, train): # Splits data to a specific GPU (tower) for parallelization def initTowerBatch(self, towerI, towersNum, dataSize): - towerBatchSize = tf.floordiv(dataSize, towersNum) + towerBatchSize = tf.compat.v1.floordiv(dataSize, towersNum) start = towerI * towerBatchSize end = (towerI + 1) * towerBatchSize if towerI < towersNum - 1 else dataSize @@ -164,7 +164,7 @@ def initTowerBatch(self, towerI, towersNum, dataSize): ''' def stem(self, images, inDim, outDim, addLoc = None): - with tf.variable_scope("stem"): + with tf.compat.v1.variable_scope("stem"): if addLoc is None: addLoc = config.locationAware @@ -206,13 +206,13 @@ def stem(self, images, inDim, outDim, addLoc = None): # Embed question using parametrized word embeddings. # The embedding are initialized to the values supported to the class initialization def qEmbeddingsOp(self, qIndices, embInit): - with tf.variable_scope("qEmbeddings"): + with tf.compat.v1.variable_scope("qEmbeddings"): # if config.useCPU: # with tf.device('/cpu:0'): # embeddingsVar = tf.Variable(self.embeddingsInit, name = "embeddings", dtype = tf.float32) # else: # embeddingsVar = tf.Variable(self.embeddingsInit, name = "embeddings", dtype = tf.float32) - embeddingsVar = tf.get_variable("emb", initializer = tf.to_float(embInit), + embeddingsVar = tf.compat.v1.get_variable("emb", initializer = tf.compat.v1.to_float(embInit), dtype = tf.float32, trainable = (not config.wrdEmbFixed)) embeddings = tf.concat([tf.zeros((1, config.wrdEmbDim)), embeddingsVar], axis = 0) questions = tf.nn.embedding_lookup(embeddings, qIndices) @@ -221,10 +221,10 @@ def qEmbeddingsOp(self, qIndices, embInit): # Embed answer words def aEmbeddingsOp(self, embInit): - with tf.variable_scope("aEmbeddings"): + with tf.compat.v1.variable_scope("aEmbeddings"): if embInit is None: return None - answerEmbeddings = tf.get_variable("emb", initializer = tf.to_float(embInit), + answerEmbeddings = tf.compat.v1.get_variable("emb", initializer = tf.compat.v1.to_float(embInit), dtype = tf.float32) return answerEmbeddings @@ -279,7 +279,7 @@ def embeddingsOp(self, qIndices, embInit): def encoder(self, questions, questionLengths, projWords = False, projQuestion = False, projDim = None): - with tf.variable_scope("encoder"): + with tf.compat.v1.variable_scope("encoder"): # variational dropout option varDp = None if config.encVariationalDropout: @@ -294,7 +294,7 @@ def encoder(self, questions, questionLengths, projWords = False, dropout = self.dropouts["encInput"], varDp = varDp, name = "rnn%d" % i) # dropout for the question vector - vecQuestions = tf.nn.dropout(vecQuestions, self.dropouts["question"]) + vecQuestions = tf.compat.v1.nn.dropout(vecQuestions, self.dropouts["question"]) # projection of encoder outputs if projWords: @@ -325,7 +325,7 @@ def encoder(self, questions, questionLengths, projWords = False, Returns the new memory value. ''' def baselineAttLayer(self, images, memory, inDim, hDim, name = "", reuse = None): - with tf.variable_scope("attLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("attLayer" + name, reuse = reuse): # projImages = ops.linear(images, inDim, hDim, name = "projImage") # projMemory = tf.expand_dims(ops.linear(memory, inDim, hDim, name = "projMemory"), axis = -2) # if config.saMultiplicative: @@ -368,7 +368,7 @@ def baselineAttLayer(self, images, memory, inDim, hDim, name = "", reuse = None) [batchSize, outDim] (out dimension depends on baseline method) ''' def baseline(self, vecQuestions, questionDim, images, imageDim, hDim): - with tf.variable_scope("baseline"): + with tf.compat.v1.variable_scope("baseline"): if config.baselineAtt: memory = self.linear(vecQuestions, questionDim, hDim, name = "qProj") images = self.linear(images, imageDim, hDim, name = "iProj") @@ -428,7 +428,7 @@ def baseline(self, vecQuestions, questionDim, images, imageDim, hDim): def MACnetwork(self, images, vecQuestions, questionWords, questionCntxWords, questionLengths, name = "", reuse = None): - with tf.variable_scope("MACnetwork" + name, reuse = reuse): + with tf.compat.v1.variable_scope("MACnetwork" + name, reuse = reuse): self.macCell = MACCell( vecQuestions = vecQuestions, @@ -453,7 +453,7 @@ def MACnetwork(self, images, vecQuestions, questionWords, questionCntxWords, for i in range(config.netLength): self.macCell.iteration = i # if config.unsharedCells: - # with tf.variable_scope("iteration%d" % i): + # with tf.compat.v1.variable_scope("iteration%d" % i): # macCell.myNameScope = "iteration%d" % i _, state = self.macCell(none, state) # else: @@ -510,7 +510,7 @@ def MACnetwork(self, images, vecQuestions, questionWords, questionCntxWords, Returns the resulted features and their dimension. ''' def outputOp(self, memory, vecQuestions, images, imageInDim): - with tf.variable_scope("outputUnit"): + with tf.compat.v1.variable_scope("outputUnit"): features = memory dim = config.memDim @@ -545,7 +545,7 @@ def outputOp(self, memory, vecQuestions, images, imageInDim): [batchSize, answerWordsNum] ''' def classifier(self, features, inDim, aEmbeddings = None): - with tf.variable_scope("classifier"): + with tf.compat.v1.variable_scope("classifier"): outDim = config.answerWordsNum dims = [inDim] + config.outClassifierDims + [outDim] if config.answerMod != "NON": @@ -557,10 +557,10 @@ def classifier(self, features, inDim, aEmbeddings = None): dropout = self.dropouts["output"]) if config.answerMod != "NON": - logits = tf.nn.dropout(logits, self.dropouts["output"]) + logits = tf.compat.v1.nn.dropout(logits, self.dropouts["output"]) interactions = ops.mul(aEmbeddings, logits, dims[-1], interMod = config.answerMod) logits = ops.inter2logits(interactions, dims[-1], sumMod = "SUM") - logits += ops.getBias((outputDim, ), "ans") + logits += ops.getBias((outDim, ), "ans") # answersWeights = tf.transpose(aEmbeddings) @@ -576,9 +576,9 @@ def classifier(self, features, inDim, aEmbeddings = None): return logits # def getTemp(): - # with tf.variable_scope("temperature"): + # with tf.compat.v1.variable_scope("temperature"): # if config.tempParametric: - # self.temperatureVar = tf.get_variable("temperature", shape = (), + # self.temperatureVar = tf.compat.v1.get_variable("temperature", shape = (), # initializer = tf.constant_initializer(5), dtype = tf.float32) # temperature = tf.sigmoid(self.temperatureVar) # else: @@ -591,7 +591,7 @@ def classifier(self, features, inDim, aEmbeddings = None): # Computes mean cross entropy loss between logits and answers. def addAnswerLossOp(self, logits, answers): - with tf.variable_scope("answerLoss"): + with tf.compat.v1.variable_scope("answerLoss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = answers, logits = logits) loss = tf.reduce_mean(losses) self.answerLossList.append(loss) @@ -601,11 +601,11 @@ def addAnswerLossOp(self, logits, answers): # Computes predictions (by finding maximal logit value, corresponding to highest probability) # and mean accuracy between predictions and answers. def addPredOp(self, logits, answers): - with tf.variable_scope("pred"): - preds = tf.to_int32(tf.argmax(logits, axis = -1)) # tf.nn.softmax( + with tf.compat.v1.variable_scope("pred"): + preds = tf.compat.v1.to_int32(tf.argmax(logits, axis = -1)) # tf.nn.softmax( corrects = tf.equal(preds, answers) - correctNum = tf.reduce_sum(tf.to_int32(corrects)) - acc = tf.reduce_mean(tf.to_float(corrects)) + correctNum = tf.reduce_sum(tf.compat.v1.to_int32(corrects)) + acc = tf.reduce_mean(tf.compat.v1.to_float(corrects)) self.correctNumList.append(correctNum) self.answerAccList.append(acc) @@ -613,9 +613,9 @@ def addPredOp(self, logits, answers): # Creates optimizer (adam) def addOptimizerOp(self): - with tf.variable_scope("trainAddOptimizer"): + with tf.compat.v1.variable_scope("trainAddOptimizer"): self.globalStep = tf.Variable(0, dtype = tf.int32, trainable = False, name = "globalStep") # init to 0 every run? - optimizer = tf.train.AdamOptimizer(learning_rate = self.lr) + optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate = self.lr) return optimizer @@ -624,10 +624,10 @@ def addOptimizerOp(self): using optimizer. ''' def computeGradients(self, optimizer, loss, trainableVars = None): # tf.trainable_variables() - with tf.variable_scope("computeGradients"): + with tf.compat.v1.variable_scope("computeGradients"): if config.trainSubset: trainableVars = [] - allVars = tf.trainable_variables() + allVars = tf.compat.v1.trainable_variables() for var in allVars: if any((s in var.name) for s in config.varSubset): trainableVars.append(var) @@ -640,9 +640,9 @@ def computeGradients(self, optimizer, loss, trainableVars = None): # tf.trainabl for parameters. ''' def addTrainingOp(self, optimizer, gradients_vars): - with tf.variable_scope("train"): + with tf.compat.v1.variable_scope("train"): gradients, variables = zip(*gradients_vars) - norm = tf.global_norm(gradients) + norm = tf.compat.v1.global_norm(gradients) # gradient clipping if config.clipGradients: @@ -650,14 +650,14 @@ def addTrainingOp(self, optimizer, gradients_vars): gradients_vars = zip(clippedGradients, variables) # updates ops (for batch norm) and train op - updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + updateOps = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) with tf.control_dependencies(updateOps): train = optimizer.apply_gradients(gradients_vars, global_step = self.globalStep) # exponential moving average if config.useEMA: ema = tf.train.ExponentialMovingAverage(decay = config.emaDecayRate) - maintainAveragesOp = ema.apply(tf.trainable_variables()) + maintainAveragesOp = ema.apply(tf.compat.v1.trainable_variables()) with tf.control_dependencies([train]): trainAndUpdateOp = tf.group(maintainAveragesOp) @@ -771,7 +771,7 @@ def build(self): self.answerAccList = [] self.predsList = [] - with tf.variable_scope("macModel"): + with tf.compat.v1.variable_scope("macModel"): for i in range(config.gpusNum): with tf.device("/gpu:{}".format(i)): with tf.name_scope("tower{}".format(i)) as scope: @@ -821,7 +821,7 @@ def build(self): self.gradientVarsList.append(gradient_vars) # reuse variables in next towers - tf.get_variable_scope().reuse_variables() + tf.compat.v1.get_variable_scope().reuse_variables() self.averageAcrossTowers(config.gpusNum) diff --git a/ops.py b/ops.py index 509ff0d2..c2401cdb 100644 --- a/ops.py +++ b/ops.py @@ -1,6 +1,7 @@ from __future__ import division import math import tensorflow as tf +import tensorflow_addons as tfa from mi_gru_cell import MiGRUCell from mi_lstm_cell import MiLSTMCell @@ -16,29 +17,29 @@ Uses random_normal initialization if 1d, otherwise uses xavier. ''' def getWeight(shape, name = ""): - with tf.variable_scope("weights"): - initializer = tf.contrib.layers.xavier_initializer() + with tf.compat.v1.variable_scope("weights"): + initializer = tf.compat.v1.keras.initializers.glorot_normal() # if len(shape) == 1: # good? # initializer = tf.random_normal_initializer() - W = tf.get_variable("weight" + name, shape = shape, initializer = initializer) + W = tf.compat.v1.get_variable("weight" + name, shape = shape, initializer = initializer) return W ''' Initializes a weight matrix variable given a shape and a name. Uses xavier ''' def getKernel(shape, name = ""): - with tf.variable_scope("kernels"): - initializer = tf.contrib.layers.xavier_initializer() - W = tf.get_variable("kernel" + name, shape = shape, initializer = initializer) + with tf.compat.v1.variable_scope("kernels"): + initializer = tf.compat.v1.keras.initializers.glorot_normal() + W = tf.compat.v1.get_variable("kernel" + name, shape = shape, initializer = initializer) return W ''' Initializes a bias variable given a shape and a name. ''' def getBias(shape, name = ""): - with tf.variable_scope("biases"): + with tf.compat.v1.variable_scope("biases"): initializer = tf.zeros_initializer() - b = tf.get_variable("bias" + name, shape = shape, initializer = initializer) + b = tf.compat.v1.get_variable("bias" + name, shape = shape, initializer = initializer) return b ######################################### basics ######################################### @@ -86,7 +87,7 @@ def L2RegularizationOp(l2 = None): l2 = config.l2 l2Loss = 0 names = ["weight", "kernel"] - for var in tf.trainable_variables(): + for var in tf.compat.v1.trainable_variables(): if any((name in var.name.lower()) for name in names): l2Loss += tf.nn.l2_loss(var) return l2 * l2Loss @@ -112,7 +113,7 @@ def L2RegularizationOp(l2 = None): ''' sumMod = ["LIN", "SUM"] def inter2logits(interactions, dim, sumMod = "LIN", dropout = 1.0, name = "", reuse = None): - with tf.variable_scope("inter2logits" + name, reuse = reuse): + with tf.compat.v1.variable_scope("inter2logits" + name, reuse = reuse): if sumMod == "SUM": logits = tf.reduce_sum(interactions, axis = -1) else: # "LIN" @@ -138,7 +139,7 @@ def inter2logits(interactions, dim, sumMod = "LIN", dropout = 1.0, name = "", re [batchSize, N] ''' def inter2att(interactions, dim, dropout = 1.0, name = "", reuse = None): - with tf.variable_scope("inter2att" + name, reuse = reuse): + with tf.compat.v1.variable_scope("inter2att" + name, reuse = reuse): logits = inter2logits(interactions, dim, dropout = dropout) attention = tf.nn.softmax(logits) return attention @@ -160,8 +161,8 @@ def att2Smry(attention, features): ''' def relu(inp): if config.relu == "PRM": - with tf.variable_scope(None, default_name = "prelu"): - alpha = tf.get_variable("alpha", shape = inp.get_shape()[-1], + with tf.compat.v1.variable_scope(None, default_name = "prelu"): + alpha = tf.compat.v1.get_variable("alpha", shape = inp.get_shape()[-1], initializer = tf.constant_initializer(0.25)) pos = tf.nn.relu(inp) neg = - (alpha * tf.nn.relu(-inp)) @@ -188,8 +189,8 @@ def relu(inp): # Sample from Gumbel(0, 1) def sampleGumbel(shape): - U = tf.random_uniform(shape, minval = 0, maxval = 1) - return -tf.log(-tf.log(U + eps) + eps) + U = tf.compat.v1.random_uniform(shape, minval = 0, maxval = 1) + return -tf.compat.v1.log(-tf.compat.v1.log(U + eps) + eps) # Draw a sample from the Gumbel-Softmax distribution def gumbelSoftmaxSample(logits, temperature): @@ -229,7 +230,7 @@ def softmaxDiscrete(logits, temperature, train): return tf.nn.softmax(logits) def parametricDropout(name, train): - var = tf.get_variable("varDp" + name, shape = (), initializer = tf.constant_initializer(2), + var = tf.compat.v1.get_variable("varDp" + name, shape = (), initializer = tf.constant_initializer(2), dtype = tf.float32) dropout = tf.cond(train, lambda: tf.sigmoid(var), lambda: 1.0) return dropout @@ -251,7 +252,7 @@ def expMask(seq, seqLength): ''' def seq2SeqLoss(logits, targets, lengths): mask = tf.sequence_mask(lengths, maxlen = tf.shape(targets)[1]) - loss = tf.contrib.seq2seq.sequence_loss(logits, targets, tf.to_float(mask)) + loss = tfa.seq2seq.sequence_loss(logits, targets, tf.compat.v1.to_float(mask)) return loss ''' @@ -262,12 +263,12 @@ def seq2SeqLoss(logits, targets, lengths): def seq2seqAcc(preds, targets, lengths): mask = tf.sequence_mask(lengths, maxlen = tf.shape(targets)[1]) corrects = tf.logical_and(tf.equal(preds, targets), mask) - numCorrects = tf.reduce_sum(tf.to_int32(corrects), axis = 1) + numCorrects = tf.reduce_sum(tf.compat.v1.to_int32(corrects), axis = 1) - acc1 = tf.to_float(numCorrects) / (tf.to_float(lengths) + eps) # add small eps instead? + acc1 = tf.compat.v1.to_float(numCorrects) / (tf.compat.v1.to_float(lengths) + eps) # add small eps instead? acc1 = tf.reduce_mean(acc1) - acc2 = tf.to_float(tf.equal(numCorrects, lengths)) + acc2 = tf.compat.v1.to_float(tf.equal(numCorrects, lengths)) acc2 = tf.reduce_mean(acc2) return acc1, acc2 @@ -300,16 +301,16 @@ def linear(inp, inDim, outDim, dropout = 1.0, act = "NON", actLayer = True, actDropout = 1.0, retVars = False, name = "", reuse = None): - with tf.variable_scope("linearLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("linearLayer" + name, reuse = reuse): W = getWeight((inDim, outDim) if outDim > 1 else (inDim, )) b = getBias((outDim, ) if outDim > 1 else ()) + bias if batchNorm is not None: - inp = tf.contrib.layers.batch_norm(inp, decay = batchNorm["decay"], + inp = tf.keras.layers.BatchNormalization(inp, decay = batchNorm["decay"], center = True, scale = True, is_training = batchNorm["train"], updates_collections = None) # tf.layers.batch_normalization, axis -1 ? - inp = tf.nn.dropout(inp, dropout) + inp = tf.compat.v1.nn.dropout(inp, dropout) if outDim > 1: output = multiply(inp, W) @@ -380,7 +381,7 @@ def FCLayer(features, dims, batchNorm = None, dropout = 1.0, act = "RELU"): def cnn(inp, inDim, outDim, batchNorm = None, dropout = 1.0, addBias = True, kernelSize = None, stride = 1, act = "NON", name = "", reuse = None): - with tf.variable_scope("cnnLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("cnnLayer" + name, reuse = reuse): if kernelSize is None: kernelSize = config.stemKernelSize @@ -390,12 +391,12 @@ def cnn(inp, inDim, outDim, batchNorm = None, dropout = 1.0, addBias = True, b = getBias((outDim, )) if batchNorm is not None: - inp = tf.contrib.layers.batch_norm(inp, decay = batchNorm["decay"], center = batchNorm["center"], + inp = tf.keras.layers.BatchNormalization(inp, decay = batchNorm["decay"], center = batchNorm["center"], scale = batchNorm["scale"], is_training = batchNorm["train"], updates_collections = None) - inp = tf.nn.dropout(inp, dropout) + inp = tf.compat.v1.nn.dropout(inp, dropout) - output = tf.nn.conv2d(inp, filter = kernel, strides = [1, stride, stride, 1], padding = "SAME") + output = tf.compat.v1.nn.conv2d(inp, filter = kernel, strides = [1, stride, stride, 1], padding = "SAME") if addBias: output += b @@ -464,9 +465,9 @@ def locationL(h, w, dim, outDim = -1, addBias = True): # dim % 4 = 0 # h,w can be tensor scalars def locationPE(h, w, dim, outDim = -1, addBias = True): - x = tf.expand_dims(tf.to_float(tf.linspace(-config.locationBias, config.locationBias, w)), axis = -1) - y = tf.expand_dims(tf.to_float(tf.linspace(-config.locationBias, config.locationBias, h)), axis = -1) - i = tf.expand_dims(tf.to_float(tf.range(dim)), axis = 0) + x = tf.expand_dims(tf.compat.v1.to_float(tf.linspace(-config.locationBias, config.locationBias, w)), axis = -1) + y = tf.expand_dims(tf.compat.v1.to_float(tf.linspace(-config.locationBias, config.locationBias, h)), axis = -1) + i = tf.expand_dims(tf.compat.v1.to_float(tf.range(dim)), axis = 0) peSinX = tf.sin(x / (tf.pow(10000.0, i / dim))) peCosX = tf.cos(x / (tf.pow(10000.0, i / dim))) @@ -514,7 +515,7 @@ def locationPE(h, w, dim, outDim = -1, addBias = True): def addLocation(features, inDim, lDim, outDim = -1, h = None, w = None, locType = "L", mod = "CNCT", name = "", reuse = None): # h,w not needed - with tf.variable_scope("addLocation" + name, reuse = reuse): + with tf.compat.v1.variable_scope("addLocation" + name, reuse = reuse): batchSize = tf.shape(features)[0] if h is None: h = tf.shape(features)[1] @@ -668,15 +669,15 @@ def linearizeFeatures(features, h, w, inDim, projDim = None, outDim = None, def mul(x, y, dim, dropout = 1.0, proj = None, interMod = "MUL", concat = None, mulBias = None, extendY = True, name = "", reuse = None): - with tf.variable_scope("mul" + name, reuse = reuse): + with tf.compat.v1.variable_scope("mul" + name, reuse = reuse): origVals = {"x": x, "y": y, "dim": dim} - x = tf.nn.dropout(x, dropout) - y = tf.nn.dropout(y, dropout) + x = tf.compat.v1.nn.dropout(x, dropout) + y = tf.compat.v1.nn.dropout(y, dropout) # projection if proj is not None: - x = tf.nn.dropout(x, proj.get("dropout", 1.0)) - y = tf.nn.dropout(y, proj.get("dropout", 1.0)) + x = tf.compat.v1.nn.dropout(x, proj.get("dropout", 1.0)) + y = tf.compat.v1.nn.dropout(y, proj.get("dropout", 1.0)) if proj["shared"]: xName, xReuse = "proj", None @@ -753,16 +754,16 @@ def createCell(hDim, reuse, cellType = None, act = None, projDim = None): activation = activations.get(act, None) if cellType == "ProjLSTM": - cell = tf.nn.rnn_cell.LSTMCell + cell = tf.compat.v1.nn.rnn_cell.LSTMCell if projDim is None: projDim = config.cellDim cell = cell(hDim, num_proj = projDim, reuse = reuse, activation = activation) return cell cells = { - "RNN": tf.nn.rnn_cell.BasicRNNCell, - "GRU": tf.nn.rnn_cell.GRUCell, - "LSTM": tf.nn.rnn_cell.BasicLSTMCell, + "RNN": tf.compat.v1.nn.rnn_cell.BasicRNNCell, + "GRU": tf.compat.v1.nn.rnn_cell.GRUCell, + "LSTM": tf.compat.v1.nn.rnn_cell.BasicLSTMCell, "MiGRU": MiGRUCell, "MiLSTM": MiLSTMCell } @@ -798,27 +799,27 @@ def createCell(hDim, reuse, cellType = None, act = None, projDim = None): def fwRNNLayer(inSeq, seqL, hDim, cellType = None, dropout = 1.0, varDp = None, name = "", reuse = None): # proj = None - with tf.variable_scope("rnnLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("rnnLayer" + name, reuse = reuse): batchSize = tf.shape(inSeq)[0] cell = createCell(hDim, reuse, cellType) # passing reuse isn't mandatory if varDp is not None: - cell = tf.contrib.rnn.DropoutWrapper(cell, + cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(cell, state_keep_prob = varDp["stateDp"], input_keep_prob = varDp["inputDp"], variational_recurrent = True, input_size = varDp["inputSize"], dtype = tf.float32) else: - inSeq = tf.nn.dropout(inSeq, dropout) + inSeq = tf.compat.v1.nn.dropout(inSeq, dropout) initialState = cell.zero_state(batchSize, tf.float32) - outSeq, lastState = tf.nn.dynamic_rnn(cell, inSeq, + outSeq, lastState = tf.compat.v1.nn.dynamic_rnn(cell, inSeq, sequence_length = seqL, initial_state = initialState, swap_memory = True) - if isinstance(lastState, tf.nn.rnn_cell.LSTMStateTuple): + if isinstance(lastState, tf.compat.v1.nn.rnn_cell.LSTMStateTuple): lastState = lastState.h # if proj is not None: @@ -859,38 +860,38 @@ def fwRNNLayer(inSeq, seqL, hDim, cellType = None, dropout = 1.0, varDp = None, def biRNNLayer(inSeq, seqL, hDim, cellType = None, dropout = 1.0, varDp = None, name = "", reuse = None): # proj = None, - with tf.variable_scope("birnnLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("birnnLayer" + name, reuse = reuse): batchSize = tf.shape(inSeq)[0] - with tf.variable_scope("fw"): + with tf.compat.v1.variable_scope("fw"): cellFw = createCell(hDim, reuse, cellType) - with tf.variable_scope("bw"): + with tf.compat.v1.variable_scope("bw"): cellBw = createCell(hDim, reuse, cellType) if varDp is not None: - cellFw = tf.contrib.rnn.DropoutWrapper(cellFw, + cellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(cellFw, state_keep_prob = varDp["stateDp"], input_keep_prob = varDp["inputDp"], variational_recurrent = True, input_size = varDp["inputSize"], dtype = tf.float32) - cellBw = tf.contrib.rnn.DropoutWrapper(cellBw, + cellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(cellBw, state_keep_prob = varDp["stateDp"], input_keep_prob = varDp["inputDp"], variational_recurrent = True, input_size = varDp["inputSize"], dtype = tf.float32) else: - inSeq = tf.nn.dropout(inSeq, dropout) + inSeq = tf.compat.v1.nn.dropout(inSeq, dropout) initialStateFw = cellFw.zero_state(batchSize, tf.float32) initialStateBw = cellBw.zero_state(batchSize, tf.float32) - (outSeqFw, outSeqBw), (lastStateFw, lastStateBw) = tf.nn.bidirectional_dynamic_rnn( + (outSeqFw, outSeqBw), (lastStateFw, lastStateBw) = tf.compat.v1.nn.bidirectional_dynamic_rnn( cellFw, cellBw, inSeq, sequence_length = seqL, initial_state_fw = initialStateFw, initial_state_bw = initialStateBw, swap_memory = True) - if isinstance(lastStateFw, tf.nn.rnn_cell.LSTMStateTuple): + if isinstance(lastStateFw, tf.compat.v1.nn.rnn_cell.LSTMStateTuple): lastStateFw = lastStateFw.h # take c? lastStateBw = lastStateBw.h @@ -940,7 +941,7 @@ def biRNNLayer(inSeq, seqL, hDim, cellType = None, dropout = 1.0, varDp = None, def RNNLayer(inSeq, seqL, hDim, bi = None, cellType = None, dropout = 1.0, varDp = None, name = "", reuse = None): # proj = None - with tf.variable_scope("rnnLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("rnnLayer" + name, reuse = reuse): if bi is None: bi = config.encBi @@ -954,7 +955,7 @@ def RNNLayer(inSeq, seqL, hDim, bi = None, cellType = None, dropout = 1.0, varDp # tf counterpart? # hDim = config.moduleDim def multigridRNNLayer(featrues, h, w, dim, name = "", reuse = None): - with tf.variable_scope("multigridRNNLayer" + name, reuse = reuse): + with tf.compat.v1.variable_scope("multigridRNNLayer" + name, reuse = reuse): featrues = linear(featrues, dim, dim / 2, name = "i") output0 = gridRNNLayer(featrues, h, w, dim, right = True, down = True, name = "rd") @@ -965,11 +966,11 @@ def multigridRNNLayer(featrues, h, w, dim, name = "", reuse = None): output = tf.concat([output0, output1, output2, output3], axis = -1) output = linear(output, 2 * dim, dim, name = "o") - return outputs + return output # h,w should be constants def gridRNNLayer(features, h, w, dim, right, down, name = "", reuse = None): - with tf.variable_scope("gridRNNLayer" + name): + with tf.compat.v1.variable_scope("gridRNNLayer" + name): batchSize = tf.shape(features)[0] cell = createCell(dim, reuse = reuse, cellType = config.stemGridRnnMod, @@ -1001,7 +1002,7 @@ def gridRNNLayer(features, h, w, dim, right, down, name = "", reuse = None): # tf seq2seq? # def projRNNLayer(inSeq, seqL, hDim, labels, labelsNum, labelsDim, labelsEmb, name = "", reuse = None): -# with tf.variable_scope("projRNNLayer" + name): +# with tf.compat.v1.variable_scope("projRNNLayer" + name): # batchSize = tf.shape(features)[0] # cell = createCell(hDim, reuse = reuse) @@ -1034,7 +1035,7 @@ def gridRNNLayer(features, h, w, dim, right, down, name = "", reuse = None): # chosenOut = tf.stack(chosenList, axis = 1) # outputs = (logitsOut, chosenOut) # else: -# labels = tf.to_float(labels) +# labels = tf.compat.v1.to_float(labels) # labels = tf.concat([tf.zeros((batchSize, 1)), labels], axis = 1)[:, :-1] # ,newaxis # inSeq = tf.concat([inSeq, tf.expand_dims(labels, axis = -1)], axis = -1) @@ -1052,10 +1053,10 @@ def gridRNNLayer(features, h, w, dim, right, down, name = "", reuse = None): probability value. ''' def generateVarDpMask(shape, keepProb): - randomTensor = tf.to_float(keepProb) - randomTensor += tf.random_uniform(shape, minval = 0, maxval = 1) + randomTensor = tf.compat.v1.to_float(keepProb) + randomTensor += tf.compat.v1.random_uniform(shape, minval = 0, maxval = 1) binaryTensor = tf.floor(randomTensor) - mask = tf.to_float(binaryTensor) + mask = tf.compat.v1.to_float(binaryTensor) return mask ''' @@ -1063,5 +1064,5 @@ def generateVarDpMask(shape, keepProb): and a dropout probability value. ''' def applyVarDpMask(inp, mask, keepProb): - ret = (tf.div(inp, tf.to_float(keepProb))) * mask + ret = (tf.compat.v1.div(inp, tf.compat.v1.to_float(keepProb))) * mask return ret diff --git a/requirements.txt b/requirements.txt index ae500bf8..5cf399f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ scipy torchvision h5py tensorflow +tensorflow-addons tqdm termcolor matplotlib