Commit a60d2e96 authored by Alexander Fuchs's avatar Alexander Fuchs
Browse files

Changed dataset handling

parent 4029cac2
from utils import trainer
from utils import data_loader
from utils import evaluation
from utils import summary_utils
......@@ -36,7 +36,7 @@ class Discriminator(tf.keras.Model):
kernel_initializer = self.kernel_initializer,
use_bias = False,
name = self.model_name +"_dense",
activation = tf.nn.relu))
activation = None))
def call(self,input,training=False):
......
......@@ -46,7 +46,7 @@ class EvalFunctions(object):
axis=-1,
))
gen_loss = tf.reduce_mean((fake_features - x[1]) ** 2)
gen_loss = 0#tf.reduce_mean((fake_features - x[1]) ** 2)
# Wasserstein losses
gen_loss += tf.reduce_mean(false)
discr_loss = tf.reduce_mean(true) - tf.reduce_mean(false)
......
......@@ -66,4 +66,4 @@ class Generator(tf.keras.Model):
elif layer[1]==1:
x = layer[0](x,training)
x = x[:,:input.shape[1],:input.shape[2],:]
return x+input
return x#+input
......@@ -18,7 +18,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
BINS = 1025
N_FRAMES = 216
N_CHANNELS = 2
N_CHANNELS = 1
def augment_input(sample,n_classes,training):
......@@ -224,7 +224,6 @@ def main(argv):
save_dir = model_save_dir,
input_keys = ["input_features","false_sample"],
label_keys = ["labels"],
init_data = tf.random.normal([batch_size,BINS,N_FRAMES,N_CHANNELS]),
start_epoch = 0)
trainer.train()
......
......@@ -136,7 +136,7 @@ def main(argv):
init_ch = 64,
init_ksize = 7,
init_stride = 2,
use_max_pool = True,
use_max_pool = True,cd
kernel_regularizer = tf.keras.regularizers.l2(2e-4),
kernel_initializer = tf.keras.initializers.he_normal(),
name = "classifier",
......
......@@ -144,6 +144,7 @@ class DataGenerator(object):
preload_samples = False,
training_percentage = 90,
save_created_features = True,
replicate_to_stereo = False,
force_mono = True,
max_time = 5,
max_samples_per_audio = 6,
......@@ -167,6 +168,7 @@ class DataGenerator(object):
self.preload_false_samples = preload_false_samples
self.hop_length = hop_length
self.max_time = max_time
self.replicate_to_stereo = replicate_to_stereo
self.max_samples_per_audio = max_samples_per_audio
self.force_feature_recalc = force_feature_recalc
self.save_created_features = save_created_features
......@@ -274,7 +276,6 @@ class DataGenerator(object):
"""Creates the features by doing a STFT"""
filename = sample['filename']
#filename = str(sample['audio_id']) + ".mp3"
if "channels" in sample.keys() and not(self.force_mono):
try:
channels_str = sample['channels']
......@@ -356,6 +357,7 @@ class DataGenerator(object):
n = len(samples)
pool = multiprocessing.Pool(os.cpu_count())
print(n)
for i, _ in enumerate(pool.imap_unordered(self.create_feature, samples), 1):
sys.stderr.write('\rdone {0:%}'.format(max(0,i/n)))
......@@ -385,6 +387,7 @@ class DataGenerator(object):
pool = multiprocessing.Pool(os.cpu_count())
for i, _ in enumerate(pool.imap_unordered(self.create_feature, samples), 1):
print(i)
sys.stderr.write('\rdone {0:%}'.format(max(0,i/n)))
def generate_sample_for_file_and_time(self):
......@@ -396,9 +399,10 @@ class DataGenerator(object):
if site == "site_3":
spectra = self.create_feature(sample)
for key in spectra.keys():
#If only mono --> duplicate
if spectra[key].shape[0] == 1:
spectrum = np.tile(spectra[key],[2,1,1])
if self.replicate_to_stereo:
#If only mono --> duplicate
if spectra[key].shape[0] == 1:
spectrum = np.tile(spectra[key],[2,1,1])
#Transpose spectrogramms for "channels_last"
spectrum = tf.transpose(spectrum,perm=[1,2,0])
......@@ -408,9 +412,10 @@ class DataGenerator(object):
start = int(float(seconds))-5
end = int(float(seconds))
spectrum = self.create_feature_for_time(filename,start,end,mono=True)
#If only mono --> duplicate
if spectrum.shape[0] == 1:
spectrum = np.tile(spectrum,[2,1,1])
if self.replicate_to_stereo:
#If only mono --> duplicate
if spectrum.shape[0] == 1:
spectrum = np.tile(spectrum,[2,1,1])
#Transpose spectrogramms for "channels_last"
spectrum = tf.transpose(spectrum,perm=[1,2,0])
......@@ -433,10 +438,11 @@ class DataGenerator(object):
if np.any(spectrum) == None or spectrum.shape[-1] != stft_len:
continue
#If only mono --> duplicate
if spectrum.shape[0] == 1:
spectrum = np.tile(spectra[spec_key],[2,1,1])
if self.replicate_to_stereo:
#If only mono --> duplicate
if spectrum.shape[0] == 1:
spectrum = np.tile(spectra[spec_key],[2,1,1])
#Transpose spectrogramms for "channels_last"
spectrum = tf.transpose(spectrum,perm=[1,2,0])
......@@ -509,20 +515,22 @@ class DataGenerator(object):
false_rnd_key = false_spec_keys[np.random.randint(0,len(false_spec_keys))]
false_spectra = false_spectra_npz.item()[false_rnd_key]
#If only mono --> duplicate
if spectra.shape[0] == 1:
spectra = np.tile(spectra,[2,1,1])
#If false only mono --> duplicate
if false_spectra.shape[0] == 1:
false_spectra = np.tile(false_spectra,[2,1,1])
if self.replicate_to_stereo:
#If only mono --> duplicate
if spectra.shape[0] == 1:
spectra = np.tile(spectra,[2,1,1])
#If false only mono --> duplicate
if false_spectra.shape[0] == 1:
false_spectra = np.tile(false_spectra,[2,1,1])
#Transpose spectrogramms for "channels_last"
spectra = tf.transpose(spectra,perm=[1,2,0])
false_spectra = tf.transpose(false_spectra,perm=[1,2,0])
if self.is_training:
if self.is_training or self.is_validation:
sample = {'input_features':spectra,
'labels':tf.one_hot(sample['bird_id'],self.dataset.n_classes+1),
'false_sample':false_spectra}
......@@ -535,5 +543,9 @@ class DataGenerator(object):
if __name__ == "__main__":
ds = Dataset("/srv/TUG/datasets/cornell_birdcall_recognition",is_training_set=True)
dg = DataGenerator(ds,None,force_feature_recalc=True,is_training=True,preload_samples=False)
dg.create_all_features()
dg = DataGenerator(ds,None,
force_feature_recalc=True,
is_training=True,
preload_samples=False,
preload_false_samples=False)
dg.create_false_features_multi_cpu()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment