Commit 4029cac2 authored by Alexander Fuchs's avatar Alexander Fuchs
Browse files

Merged code with kaggle notebook code

parent 1dc9174e
......@@ -14,28 +14,27 @@ import tensorflow as tf
warnings.filterwarnings('ignore')
class Dataset(object):
"""Implements the dataset properties
"""
def __init__(self,path="",is_training_set=True):
def __init__(self,path="",is_training_set=False):
#Paths
self.is_training_set = is_training_set
self.path = path
self.train_audio_path = os.path.join(path,"train_audio")
self.train_csv_path = os.path.join(path,"train.csv")
self.train_dict = self.csv_to_dict(self.train_csv_path)
#Path of "false" audio samples
self.false_audio_path = os.path.join(path,"false_audio")
if self.is_training_set:
self.train_audio_path = os.path.join(path,"train_audio")
self.train_csv_path = os.path.join(path,"train.csv")
self.train_dict = self.csv_to_dict(self.train_csv_path)
#Path of "false" audio samples
self.false_audio_path = os.path.join(path,"false_audio")
#Path to test audio
self.test_audio_path = os.path.join(path,"example_test_audio")
self.test_audio_path = os.path.join(path,"test_audio")
self.test_csv_path = os.path.join(path,"test.csv")
self.test_dict = self.csv_to_dict(self.test_csv_path)
self.test_meta_path = os.path.join(path,"example_test_audio_metadata.csv")
self.test_dict = self.csv_to_dict(self.test_meta_path,self.test_dict)
self.test_summary_path = os.path.join(path,"example_test_audio_summary.csv")
self.test_dict = self.csv_to_dict(self.test_summary_path,self.test_dict)
self.prepare()
def csv_to_dict(self,path,data_dict = None):
......@@ -83,6 +82,7 @@ class Dataset(object):
self.train_samples = []
mp3_filenames = glob.glob(self.train_audio_path + "/**/*",
recursive = True)
for i_row in range(1,len(self.train_dict['filename'])):
sample = {}
for key in self.train_dict:
......@@ -108,51 +108,54 @@ class Dataset(object):
#Prepare test samples
self.test_samples = []
try:
self.bird_dict = np.load(os.path.join(self.path,"bird_dict.npy"),allow_pickle=True).item()
self.bird_dict = np.load(os.path.join("../input/christoph-ds/birddict/bird_dict.npy"),allow_pickle=True).item()
except:
raise("Run first with training set to create bird mapping!")
mp3_filenames = glob.glob(self.test_audio_path + "/**/*",
recursive = True)
for i_row in range(1,len(self.test_dict['filename'])):
for i_row in range(1,len(self.test_dict['site'])):
sample = {}
for key in self.test_dict:
if len(self.test_dict[key])>i_row:
if key == 'filename':
if key == 'audio_id':
search_name = self.test_dict[key][i_row]
for name in mp3_filenames:
if search_name in name:
sample[key] = name
sample["filename"] = name
break
else:
sample[key] = self.test_dict[key][i_row]
else:
sample[key] = None
self.test_samples.append(sample)
self.n_samples = len(self.test_samples)
class DataGenerator(object):
def __init__(self,dataset,augmentation,
shuffle = True,
is_training = True,
shuffle = False, #temp by obch
is_training = False,
is_validation = False,
force_feature_recalc = False,
preload_false_samples = True,
preload_samples = False,
training_percentage = 90,
save_created_features = True,
force_mono = True,
max_time = 5,
max_samples_per_audio = 6,
n_fft = 2048,
hop_length = 512,
sampling_rate = 22050):
self.dataset = dataset
#Shuffle files before loading since dataset is ordered by class
if shuffle:
random.seed(4)
random.shuffle(self.dataset.train_samples)
self.n_training_samples = int(dataset.n_samples*training_percentage/100)
self.n_validation_samples = dataset.n_samples-self.n_training_samples
self.augmentation = augmentation
......@@ -167,27 +170,30 @@ class DataGenerator(object):
self.max_samples_per_audio = max_samples_per_audio
self.force_feature_recalc = force_feature_recalc
self.save_created_features = save_created_features
self.force_mono = force_mono
if self.is_training:
self.first_sample = 0
self.last_sample = self.n_training_samples
elif self.is_validation:
self.first_sample = self.n_training_samples
self.last_sample = self.dataset.n_samples
#Get paths of false samples
false_samples_mono = glob.glob(self.dataset.false_audio_path+ "/mono/*.npz",
recursive = True)
false_samples_stereo = glob.glob(self.dataset.false_audio_path+ "/stereo/*.npz",
recursive = True)
self.false_sample_paths = false_samples_mono + false_samples_stereo
#Pre load false samples
if self.preload_false_samples:
self.preloaded_false_samples = {}
for path in self.false_sample_paths:
with np.load(path,allow_pickle=True) as sample_file:
self.preloaded_false_samples[path] = sample_file.f.arr_0
print("Finished pre-loading false samples!")
if self.is_training or self.is_validation:
#Get paths of false samples
false_samples_mono = glob.glob(self.dataset.false_audio_path+ "/mono/*.npz",
recursive = True)
false_samples_stereo = glob.glob(self.dataset.false_audio_path+ "/stereo/*.npz",
recursive = True)
self.false_sample_paths = false_samples_mono + false_samples_stereo
#Pre load false samples
if self.preload_false_samples:
self.preloaded_false_samples = {}
for path in self.false_sample_paths:
with np.load(path,allow_pickle=True) as sample_file:
self.preloaded_false_samples[path] = sample_file.f.arr_0
print("Finished pre-loading false samples!")
if self.is_training or self.is_validation:
......@@ -238,19 +244,53 @@ class DataGenerator(object):
return spectrum
def create_feature(self,sample):
def create_feature_for_time(self,filename,start,end,mono=True):
"""Creates the features by doing a STFT"""
y, sr = librosa.core.load(filename,mono=mono,sr=self.sampling_rate)
if mono == True:
y = np.expand_dims(y,0)
channels = 1
else:
channels = 2
if start < end:
start = int(self.sampling_rate*start)
end = int(self.sampling_rate*end)
end = min(end,y.shape[-1])
y_sample = y[:,start:end]
#Transform audio
spectrum = self.do_stft(y_sample,channels)
#Pad spectrum
spectrum = self.pad_sample(spectrum,
x_size=np.ceil(self.max_time*self.sampling_rate/self.hop_length))
else:
raise(ValueError("start must be before end"))
return spectrum
def create_feature(self,sample):
"""Creates the features by doing a STFT"""
filename = sample['filename']
channels_str = sample['channels']
channels = int(channels_str.split(" ")[0])
if channels == 1:
mono = True
#filename = str(sample['audio_id']) + ".mp3"
if "channels" in sample.keys() and not(self.force_mono):
try:
channels_str = sample['channels']
except:
channels_str = "mono"
channels = int(channels_str.split(" ")[0])
if channels == 1:
mono = True
else:
mono = False
else:
mono = False
mono = True
channels = 1
y, sr = librosa.core.load(filename,mono=mono,sr=self.sampling_rate)
y,_ = librosa.effects.trim(y)
if self.is_training or self.is_validation:
y,_ = librosa.effects.trim(y)
if mono == True:
y = np.expand_dims(y,0)
......@@ -311,7 +351,7 @@ class DataGenerator(object):
if not(os.path.isfile(filename.replace("mp3","npz"))) or self.force_feature_recalc:
samples.append(sample)
print(str(len(all_samples)-len(samples))+" feature samples already exist")
print(str(len(all_samples)-len(samples))+" feature samples of "+str(len(all_samples))+" already exist")
n = len(samples)
......@@ -347,11 +387,42 @@ class DataGenerator(object):
for i, _ in enumerate(pool.imap_unordered(self.create_feature, samples), 1):
sys.stderr.write('\rdone {0:%}'.format(max(0,i/n)))
def generate_sample_for_file_and_time(self):
for sample in self.samples:
filename = sample['filename']
seconds = sample['seconds']
site = sample['site']
if site == "site_3":
spectra = self.create_feature(sample)
for key in spectra.keys():
#If only mono --> duplicate
if spectra[key].shape[0] == 1:
spectrum = np.tile(spectra[key],[2,1,1])
#Transpose spectrogramms for "channels_last"
spectrum = tf.transpose(spectrum,perm=[1,2,0])
sample = {'input_features':spectrum}
yield self.augmentation(sample,self.is_training)
else:
start = int(float(seconds))-5
end = int(float(seconds))
spectrum = self.create_feature_for_time(filename,start,end,mono=True)
#If only mono --> duplicate
if spectrum.shape[0] == 1:
spectrum = np.tile(spectrum,[2,1,1])
#Transpose spectrogramms for "channels_last"
spectrum = tf.transpose(spectrum,perm=[1,2,0])
sample = {'input_features':spectrum}
yield self.augmentation(sample,self.is_training)
def generate_all_samples_from_scratch(self):
stft_len = int(np.ceil(self.max_time*self.sampling_rate/self.hop_length))
for sample in self.samples:
#filename = str(sample['audio_id']) + ".mp3"
filename = sample['filename']
#Create features via STFT if no file exists
spectra = self.create_feature(sample)
......@@ -359,6 +430,7 @@ class DataGenerator(object):
for spec_key in spectra.keys():
#Check for None type
spectrum = spectra[spec_key]
if np.any(spectrum) == None or spectrum.shape[-1] != stft_len:
continue
......@@ -369,17 +441,20 @@ class DataGenerator(object):
#Transpose spectrogramms for "channels_last"
spectrum = tf.transpose(spectrum,perm=[1,2,0])
#Fill false spectra with zero
false_spectrum = tf.zeros_like(spectrum)
if self.is_training or self.is_validation:
label = tf.one_hot(sample['bird_id'],self.dataset.n_classes+1)
else:
label = None
#Fill false spectra with zero
false_spectrum = tf.zeros_like(spectrum)
if self.is_training or self.is_validation:
label = tf.one_hot(sample['bird_id'],self.dataset.n_classes+1)
else:
label = None
sub_sample = {'input_features':spectrum,
'labels':label,
'false_sample':false_spectrum}
sub_sample = {'input_features':spectrum,
'labels':label,
'false_sample':false_spectrum}
else:
sub_sample = {'input_features':spectrum}
if self.augmentation != None:
yield self.augmentation(sub_sample,self.is_training)
......@@ -393,8 +468,10 @@ class DataGenerator(object):
for sample in self.samples:
filename = sample['filename']
#If feature was already created load from file
if os.path.isfile(filename.replace("mp3","npz")) and not(self.force_feature_recalc):
if self.preload_samples:
spectra_npz = self.preloaded_samples[filename.replace("mp3","npz")]
......@@ -444,16 +521,19 @@ class DataGenerator(object):
#Transpose spectrogramms for "channels_last"
spectra = tf.transpose(spectra,perm=[1,2,0])
false_spectra = tf.transpose(false_spectra,perm=[1,2,0])
sample = {'input_features':spectra,
'labels':tf.one_hot(sample['bird_id'],self.dataset.n_classes+1),
'false_sample':false_spectra}
if self.is_training:
sample = {'input_features':spectra,
'labels':tf.one_hot(sample['bird_id'],self.dataset.n_classes+1),
'false_sample':false_spectra}
else:
sample = {'input_features':spectra}
if self.augmentation != None:
yield self.augmentation(sample,self.is_training)
else:
yield sample
if __name__ == "__main__":
ds = Dataset("/srv/TUG/datasets/cornell_birdcall_recognition")
dg = DataGenerator(ds,None,force_feature_recalc=True)
dg.create_all_features_multi_cpu()
ds = Dataset("/srv/TUG/datasets/cornell_birdcall_recognition",is_training_set=True)
dg = DataGenerator(ds,None,force_feature_recalc=True,is_training=True,preload_samples=False)
dg.create_all_features()
......@@ -273,7 +273,51 @@ class ModelTrainer():
x.append(xy[key])
return x
def predict_sample_by_row_id(self,data_generator,sample):
data_generator.samples = [sample]
dataset = tf.data.Dataset.from_generator(data_generator.generate_sample_for_file_and_time,
output_types = {'input_features':tf.float32})
site = sample["site"]
if site == "site_3":
dataset = dataset.batch(64,drop_remainder=False)
else:
dataset = dataset.batch(1)
all_predictions = []
for xy in dataset:
x = self.get_data_for_keys(xy,self.input_keys)
predictions = self.predict(x,False)
all_predictions.append(predictions["predictions"])
if site == "site_3":
print([pred.shape for pred in all_predictions])
all_predictions = tf.concat(all_predictions,axis=0)
pred = tf.reduce_mean(all_predictions,axis=0,keepdims=True)
else:
pred = all_predictions[0]
return pred
def predict_samples_from_file(self,data_generator,filename,site = "",max_sub_samples=99):
data_generator.samples = [{"filename":filename,"channels":"1 (mono)"}]
dataset = tf.data.Dataset.from_generator(data_generator.generate_all_samples_from_scratch,
output_types = {'input_features':tf.float32})
all_predictions = []
for xy in dataset:
x = self.get_data_for_keys(xy,self.input_keys)
predictions = self.predict(x,False)
all_predictions.append(predictions["predictions"])
if len(all_predictions) == max_sub_samples:
break
if site == "site_3":
all_predictions = tf.concat(all_predictions,axis=0)
all_predictions = [tf.reduce_mean(all_predictions,axis=0,keepdims=True)]
return all_predictions
def predict_dataset(self,data_generator,use_progbar = False,num_batches=1):
if use_progbar:
prog = tf.keras.utils.Progbar(num_batches)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment