Commit 44b85dff authored by Alexander Fuchs's avatar Alexander Fuchs
Browse files

Implemented a first working example for bird call classification

parent 859462d0
......@@ -13,4 +13,8 @@ def basic_dmoe_cnn_mnist():
model.add(DenseMoE(64, n_experts=10, expert_activation='relu', gating_activation='softmax'))
model.add(tf.keras.layers.Dense(10))
return model
\ No newline at end of file
return model
from models.layers.dense_moe import DenseMoE
import tensorflow as tf
class Network(tf.keras.Model):
def __init__(self,
network_block,
n_blocks,
n_layers,
strides,
channel_base,
n_classes,
init_ch,
init_ksize,
init_stride,
use_max_pool = True,
kernel_regularizer = tf.keras.regularizers.l2(2e-4),
kernel_initializer = tf.keras.initializers.he_normal(),
dropout=0.2):
super(Network, self).__init__()
self.network_block = network_block
self.n_blocks = n_blocks
self.n_layers = n_layers
self.strides = strides
self.channel_base = channel_base
self.n_classes = n_classes
self.dropout = dropout
self.init_ch = init_ch
self.init_ksize = init_ksize
self.init_stride = init_stride
self.use_max_pool = use_max_pool
self.kernel_regularizer = kernel_regularizer
self.kernel_initializer = kernel_initializer
def build(self,input_shape):
self.init_conv = tf.keras.layers.Conv2D(self.init_ch,
self.init_ksize,
self.init_stride,
padding = "same",
use_bias = False,
name = 'initial_conv',
kernel_regularizer = self.kernel_regularizer,
kernel_initializer = self.kernel_initializer)
self.init_bn = tf.keras.layers.BatchNormalization(axis=-1)
self.init_relu = tf.keras.layers.Activation("relu")
if self.use_max_pool:
self.init_max_pool = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
strides=(2, 2),
padding="same")
self.network_blocks = []
for i_block in range(self.n_blocks):
self.network_blocks.append(self.network_block(self.n_layers[i_block],
self.channel_base[i_block],
stride = self.strides[i_block],
kernel_regularizer = self.kernel_regularizer,
kernel_initializer = self.kernel_initializer))
self.last_bn = tf.keras.layers.BatchNormalization(axis=-1)
self.last_relu = tf.keras.layers.Activation("relu")
self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
self.dense = tf.keras.layers.Dense(self.n_classes,
name = 'dense_layer',
kernel_regularizer = self.kernel_regularizer,
kernel_initializer = self.kernel_initializer)
def call(self,input,training=False):
"""Returns logits"""
x = self.init_conv(input)
x = self.init_bn(x,training)
x = self.init_relu(x)
if self.use_max_pool:
x = self.init_max_pool(x)
for block in self.network_blocks:
x = block(x,training)
x = self.last_bn(x,training)
x = self.last_relu(x)
x = self.avg_pool(x)
x = self.dense(x)
return x
from models.layers.dense_moe import DenseMoE
import tensorflow as tf
class ResBlockBasicLayer(tf.keras.layers.Layer):
def __init__(self,n_layers,n_channels,
stride=1,
kernel_regularizer = tf.keras.regularizers.l2(2e-4),
kernel_initializer = tf.keras.initializers.he_normal(),
dropout=0.0,
name=''):
super(ResBlockBasicLayer, self).__init__()
self.n_channels = n_channels
self.n_layers = n_layers
self.stride = stride
self.kernel_regularizer = kernel_regularizer
self.kernel_initializer = kernel_initializer
self.dropout = dropout
self.name_op = name
def build(self,input_shape):
self.layers = []
self.shortcut = tf.keras.layers.Conv2D(self.n_channels,
kernel_size = (1,1),
strides = (self.stride, self.stride),
padding = "same",
use_bias = False,
name = self.name_op+'_sc_conv',
kernel_regularizer = self.kernel_regularizer,
kernel_initializer = self.kernel_initializer)
for i in range(self.n_layers):
self.layers.append((tf.keras.layers.BatchNormalization(name=self.name_op+'_bn_'+str(i),axis=-1),1))
self.layers.append((tf.keras.layers.Activation('relu'),0))
if i == 0:
self.layers.append((tf.keras.layers.Conv2D(self.n_channels,
kernel_size=(3, 3),
strides=(self.stride, self.stride),
padding="same",
use_bias=False,
name=self.name_op+'_conv_' + str(i),
kernel_regularizer = self.kernel_regularizer,
kernel_initializer = self.kernel_initializer,
activation=None),0))
else:
self.layers.append((tf.keras.layers.Conv2D(self.n_channels,
kernel_size=(3, 3),
strides=(1, 1),
padding="same",
use_bias=False,
name=self.name_op+'_conv_' + str(i),
kernel_regularizer = self.kernel_regularizer,
kernel_initializer = self.kernel_initializer,
activation=None),0))
if self.dropout > 0.0:
self.layers.append((tf.keras.layers.Dropout(rate=self.dropout),1))
if (i+1) % 2 == 0:
self.layers.append((tf.keras.layers.Add(),2))
def call(self,input,training=False):
x = input
sc = self.shortcut(input)
for layer in self.layers:
if layer[1]==0:
x = layer[0](x)
elif layer[1]==1:
x = layer[0](x,training)
elif layer[1]==2:
x = layer[0]([sc,x])
sc = x
return x
......@@ -5,27 +5,27 @@ from absl import app
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from utils.trainer import ModelTrainer
from models import basic_dmoe_cnn
from utils.data_loader import Dataset
from utils.data_loader import DataGenerator
from models.network import Network
from models.res_block import ResBlockBasicLayer
#logging.set_verbosity(logging.WARNING)
HEIGHT = 28
WIDTH = 28
NUM_CHANNELS = 1
BATCH_SIZE = 16
NUM_CLASSES = 10
def preprocess_feature(sample):
def preprocess_input(sample):
"""Preprocess a single image of layout [height, width, depth]."""
return sample
def data_generator(data_generator,batch_size,is_training,shuffle_buffer = 128,is_validation=False,take_n=None,skip_n=None):
def data_generator(data_generator,batch_size,is_training,
shuffle_buffer = 128,
is_validation=False,
take_n=None,
skip_n=None):
dataset = tf.data.Dataset.from_generator(data_generator)
dataset = tf.data.Dataset.from_generator(data_generator,
output_types = {'input_features':tf.float32,
'labels':tf.int32})
if skip_n != None:
dataset = dataset.skip(skip_n)
......@@ -35,14 +35,12 @@ def data_generator(data_generator,batch_size,is_training,shuffle_buffer = 128,is
if is_training:
dataset = dataset.shuffle(shuffle_buffer)
dataset = dataset.map(lambda feat, lbl: (preprocess_feature(feat), lbl))
dataset = dataset.map(lambda feat, lbl: (feat, tf.one_hot(lbl,NUM_CLASSES)))
dataset = dataset.batch(BATCH_SIZE,drop_remainder=True)
#dataset = dataset.map(preprocess_input)
dataset = dataset.batch(batch_size,drop_remainder=True)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
else:
dataset = dataset.map(lambda feat, lbl: (preprocess_feature(feat), lbl))
dataset = dataset.map(lambda feat, lbl: (feat, tf.one_hot(lbl,NUM_CLASSES)))
dataset = dataset.batch(10)
#dataset = dataset.map(preprocess_input)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
return dataset
......@@ -63,7 +61,7 @@ FLAGS = flags.FLAGS
flags.DEFINE_string('model_dir', '/tmp', 'save directory name')
flags.DEFINE_string('data_dir', '/tmp', 'data directory name')
flags.DEFINE_integer('epochs', 40, 'number of epochs')
flags.DEFINE_integer('batch_size', 16, 'Mini-batch size')
flags.DEFINE_float('dropout_rate', 0.0, 'dropout rate for the dense blocks')
flags.DEFINE_float('weight_decay', 1e-4, 'weight decay parameter')
flags.DEFINE_float('learning_rate', 1e-3, 'learning rate')
......@@ -91,6 +89,7 @@ def main(argv):
test_data_dir = data_dir
train_data_dir = data_dir
epochs = FLAGS.epochs
batch_size = FLAGS.batch_size
dropout_rate = FLAGS.dropout_rate
weight_decay = FLAGS.weight_decay
lr = FLAGS.learning_rate
......@@ -99,17 +98,36 @@ def main(argv):
model_save_dir+="_dropout_rate_"+str(dropout_rate)+"_learning_rate_"+str(lr)+"_weight_decay_"+str(weight_decay)
model = basic_dmoe_cnn.basic_dmoe_cnn_mnist()
ds_train = Dataset(data_dir,is_training_set = True)
n_total = ds_train.n_samples
dg_train = DataGenerator(ds_train,None)
n_train = int(n_total*training_percentage/100)
n_val = n_total-n_train
#ResNet 18
model = Network(ResBlockBasicLayer,
n_blocks = 4,
n_layers = [2,2,2,2],
strides = [2,1,2,1],
channel_base = [64,128,256,512],
n_classes = ds_train.n_classes,
init_ch = 64,
init_ksize = 7,
init_stride = 1,
use_max_pool = True,
kernel_regularizer = tf.keras.regularizers.l2(2e-4),
kernel_initializer = tf.keras.initializers.he_normal(),
dropout=dropout_rate)
train_data_gen = data_generator(dg_train,BATCH_SIZE,is_training=True,take_n=n_train)
train_data_gen = data_generator(dg_train.generate,batch_size,
is_training=True,
take_n=n_train)
val_data_gen = data_generator(train_data,100,is_training=False,is_validation = True,skip_n=n_train,take_n=n_val)
val_data_gen = data_generator(dg_train.generate,10,
is_training=False,
is_validation = True,
skip_n=n_train,
take_n=n_val)
trainer = ModelTrainer(model,
......@@ -119,11 +137,11 @@ def main(argv):
epochs,
learning_rate_fn = learning_rate_fn,
optimizer = tf.keras.optimizers.Adam,
num_train_batches = int(n_train/BATCH_SIZE),
num_train_batches = int(n_train/batch_size),
base_learning_rate = lr,
load_model = load_model,
save_dir = model_save_dir,
init_data = tf.random.normal([BATCH_SIZE,1025,432,2]),
init_data = tf.random.normal([batch_size,1025,432,2]),
start_epoch = 0)
trainer.train()
......
......@@ -6,9 +6,8 @@ import os
import glob
import warnings
import multiprocessing
import sys
import tensorflow as tf
warnings.filterwarnings('ignore')
class Dataset(object):
......@@ -62,7 +61,6 @@ class Dataset(object):
if not(os.path.isfile(os.path.join(self.path,"bird_dict.npy"))):
all_birds = self.train_dict['ebird_code']
self.unique_birds_ebird_codes = np.unique(all_birds)
self.n_classes = len(self.unique_birds_ebird_codes)
self.bird_dict ={}
bird_id = 0
for ebird_code in self.unique_birds_ebird_codes:
......@@ -73,7 +71,9 @@ class Dataset(object):
np.save(os.path.join(self.path,"bird_dict.npy"),self.bird_dict)
else:
self.bird_dict = np.load(os.path.join(self.path,"bird_dict.npy"),allow_pickle=True).item()
self.n_classes = len(self.bird_dict.keys())
self.train_samples = []
mp3_filenames = glob.glob(self.train_audio_path + "/**/*",
recursive = True)
......@@ -256,14 +256,19 @@ class DataGenerator(object):
filename = sample['filename']
#If feature was already created load from file
if os.path.isfile(filename.replace("mp3","npz")) and not(self.force_feature_recalc):
spectra = np.load(filename.replace("mp3","npz"),allow_pickle=True)
spectra_npz = np.load(filename.replace("mp3","npz"),allow_pickle=True)
spectra = spectra_npz.f.arr_0
else:
#Create features via STFT if no file exists
spectra = self.create_feature(sample)
sample['spectra'] = spectra
#If only mono --> duplicate
if spectra.shape[0] == 1:
spectra = np.tile(spectra,[2,1,1])
#Transpose spectrogramms for "channels_last"
spectra = tf.transpose(spectra,perm=[1,2,0])
yield sample
yield {'input_features':spectra,'labels':tf.one_hot(sample['bird_id'],self.dataset.n_classes)}
if __name__ == "__main__":
ds = Dataset("/srv/TUG/datasets/birdsong-recognition")
......
......@@ -20,6 +20,8 @@ class ModelTrainer():
start_epoch = 0,
num_train_batches = None,
load_model = False,
input_key = "input_features",
label_key = "labels",
gradient_processesing_fn = clip_by_value_10,
save_dir = 'tmp'):
......@@ -58,7 +60,9 @@ class ModelTrainer():
self.summary_writer["train"] = tf.summary.create_file_writer(self.train_log_dir)
self.summary_writer["val"] = tf.summary.create_file_writer(self.validation_log_dir)
self.summary_writer["test"] = tf.summary.create_file_writer(self.test_log_dir)
self.input_key = input_key
self.label_key = label_key
#Get number of training batches
if num_train_batches == None:
self.max_number_batches = tf.data.experimental.cardinality(training_data_generator).numpy()
......@@ -118,6 +122,7 @@ class ModelTrainer():
def compute_loss(self, x, y , training = True):
logits_classifier = self.model(x,training=training)
# Cross entropy losses
class_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
......@@ -221,7 +226,10 @@ class ModelTrainer():
print("Starting epoch " + str(epoch + 1))
batch = 0
for train_x, train_y in self.training_data_generator:
for train_xy in self.training_data_generator:
train_x = train_xy[self.input_key]
train_y = train_xy[self.label_key]
start_time = time()
outputs = self.train_step(train_x, train_y)
class_loss,weight_decay_loss,total_loss,predictions = outputs
......@@ -242,7 +250,10 @@ class ModelTrainer():
self.write_summaries(epoch, "train")
if self.validation_data_generator != None:
for validation_x, validation_y in self.validation_data_generator:
for validation_xy in self.validation_data_generator:
validation_x = validation_xy[self.input_key]
validation_y = validation_xy[self.label_key]
class_loss, weight_decay_loss,total_loss,predictions = self.compute_loss(validation_x,
validation_y,
training=False)
......@@ -255,7 +266,10 @@ class ModelTrainer():
if self.test_data_generator != None:
for test_x, test_y in self.test_data_generator:
for test_xy in self.test_data_generator:
test_x = test_xy[self.input_key]
test_y = test_xy[self.label_key]
class_loss, weight_decay_loss, total_loss, predictions = self.compute_loss(test_x,
test_y,
training=False)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment