Commit bfc84030 authored by Alexander Fuchs's avatar Alexander Fuchs
Browse files

Added multi cpu support for feature extraction, implemented bird mapping

parent 45b3e48d
from __future__ import division
import numpy as np
import librosa
import csv
import os
import glob
import warnings
import sox
import multiprocessing
import sys
warnings.filterwarnings('ignore')
class Dataset(object):
"""Implements the dataset properties
"""
def __init__(self,path=""):
def __init__(self,path="",is_training_set=True):
#Paths
self.is_training_set = is_training_set
self.path = path
self.train_audio_path = os.path.join(path,"train_audio")
self.train_csv_path = os.path.join(path,"train.csv")
self.train_dict = self.csv_to_dict(self.train_csv_path)
......@@ -50,48 +55,76 @@ class Dataset(object):
def prepare(self):
"""Prepares the Dataset class for use.
"""
#Prepare train samples
self.train_samples = []
mp3_filenames = glob.glob(self.train_audio_path + "/**/*",
recursive = True)
for i_row in range(1,len(self.train_dict['filename'])):
sample = {}
for key in self.train_dict:
if len(self.train_dict[key])>i_row:
if key == 'filename':
search_name = self.train_dict[key][i_row]
for name in mp3_filenames:
if search_name in name:
sample[key] = name
break
if self.is_training_set:
#Prepare train samples
#Create bird mapping name->int
if not(os.path.isfile(os.path.join(self.path,"bird_dict.npy"))):
all_birds = self.train_dict['ebird_code']
self.unique_birds_ebird_codes = np.unique(all_birds)
self.n_classes = len(self.unique_birds_ebird_codes)
self.bird_dict ={}
bird_id = 0
for ebird_code in self.unique_birds_ebird_codes:
self.bird_dict[ebird_code] = bird_id
bird_id += 1
np.save(os.path.join(self.path,"bird_dict.npy"),self.bird_dict)
else:
self.bird_dict = np.load(os.path.join(self.path,"bird_dict.npy"),allow_pickle=True).item()
self.train_samples = []
mp3_filenames = glob.glob(self.train_audio_path + "/**/*",
recursive = True)
for i_row in range(1,len(self.train_dict['filename'])):
sample = {}
for key in self.train_dict:
if len(self.train_dict[key])>i_row:
if key == 'filename':
search_name = self.train_dict[key][i_row]
for name in mp3_filenames:
if search_name in name:
sample[key] = name
break
elif key == 'ebird_code':
sample['bird_id'] = self.bird_dict[self.train_dict[key][i_row]]
else:
sample[key] = self.train_dict[key][i_row]
else:
sample[key] = self.train_dict[key][i_row]
else:
sample[key] = None
self.train_samples.append(sample)
#Prepare test samples
self.test_samples = []
mp3_filenames = glob.glob(self.test_audio_path + "/**/*",
recursive = True)
for i_row in range(1,len(self.test_dict['filename'])):
sample = {}
for key in self.test_dict:
if len(self.test_dict[key])>i_row:
if key == 'filename':
search_name = self.test_dict[key][i_row]
for name in mp3_filenames:
if search_name in name:
sample[key] = name
break
sample[key] = None
self.train_samples.append(sample)
self.n_samples = len(self.train_samples)
else:
#Prepare test samples
self.test_samples = []
try:
self.bird_dict = np.load(os.path.join(self.path,"bird_dict.npy"),allow_pickle=True).item()
except:
raise("Run first with training set to create bird mapping!")
mp3_filenames = glob.glob(self.test_audio_path + "/**/*",
recursive = True)
for i_row in range(1,len(self.test_dict['filename'])):
sample = {}
for key in self.test_dict:
if len(self.test_dict[key])>i_row:
if key == 'filename':
search_name = self.test_dict[key][i_row]
for name in mp3_filenames:
if search_name in name:
sample[key] = name
break
else:
sample[key] = self.test_dict[key][i_row]
else:
sample[key] = self.test_dict[key][i_row]
else:
sample[key] = None
self.test_samples.append(sample)
sample[key] = None
self.test_samples.append(sample)
self.n_samples = len(self.test_samples)
class DataGenerator(object):
def __init__(self,dataset,augmentation,
......@@ -123,12 +156,13 @@ class DataGenerator(object):
y = np.expand_dims(y,0)
duration = y.shape[-1]/self.sampling_rate
#If the sample is longer than two times the maximum audio length, the segments are split
if duration > 2*self.max_time:
y_begin = y[:,:int(self.sampling_rate*self.max_time/2)]
y_end = y[:,-int(self.sampling_rate*self.max_time/2):]
spectra_begin = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y_begin[channel,:],
n_fft = 2048,
......@@ -140,6 +174,7 @@ class DataGenerator(object):
spectra_begin = np.stack(spectra_begin,axis=0)
spectra_end = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y_end[channel,:],
n_fft = 2048,
......@@ -152,6 +187,7 @@ class DataGenerator(object):
spectra = np.concatenate([spectra_begin,spectra_end],axis=-1)
else:
#If sample is > max_time and < 2*max time take the beginning of the file
if duration >= self.max_time:
y = y[:,:int(self.sampling_rate*self.max_time)]
else:
......@@ -161,6 +197,7 @@ class DataGenerator(object):
y = y_tmp
spectra = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y[channel,:],
n_fft = 2048,
......@@ -171,33 +208,59 @@ class DataGenerator(object):
spectra.append(spectrum)
spectra = np.stack(spectra,axis=0)
spectra = np.concatenate([spectra,np.zeros((channels,spectra.shape[1],1))],axis=-1)
np.savez(filename.replace("mp3","npz"),spectra)
return spectra
def generate(self):
def create_all_features(self):
if self.is_training:
samples = self.dataset.train_samples
else:
samples = self.dataset.test_samples
n = len(samples)
ct = 0
tot_size = 0
for sample in samples:
spectra = self.create_feature(sample)
tot_size += spectra.size * spectra.itemsize/1e6
ct += 1
print("Calculated "+str(ct/n*100)+"% of samples, using "+str(tot_size)+" MB of disc space...")
def create_all_features_multi_cpu(self):
if self.is_training:
samples = self.dataset.train_samples
else:
samples = self.dataset.test_samples
n = len(samples)
pool = multiprocessing.Pool(os.cpu_count())
for i, _ in enumerate(pool.imap_unordered(self.create_feature, samples), 1):
sys.stderr.write('\rdone {0:%}'.format(i/n))
def generate(self):
if self.is_training:
samples = self.dataset.train_samples
else:
samples = self.dataset.test_samples
for sample in samples:
filename = sample['filename']
#If feature was already created load from file
if os.path.isfile(filename.replace("mp3","npz")) and not(self.force_feature_recalc):
spectra = np.load(filename.replace("mp3","npz"))
spectra = np.load(filename.replace("mp3","npz"),allow_pickle=True)
else:
#Create features via STFT if no file exists
spectra = self.create_feature(sample)
tot_size += spectra.size * spectra.itemsize/1e6
np.savez(filename.replace("mp3","npz"),spectra)
ct += 1
print("Calculated "+str(ct/n*100)+"% of samples, using "+str(tot_size)+" MB of disc space...")
yield sample
if __name__ == "__main__":
ds = Dataset("/srv/TUG/datasets/birdsong-recognition")
dg = DataGenerator(ds,None)
dg.generate()
dg.create_all_features_multi_cpu()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment