Commit 859462d0 authored by Alexander Fuchs's avatar Alexander Fuchs
Browse files

Imporved feauture pre-processing

parent 978bb202
......@@ -140,78 +140,65 @@ class DataGenerator(object):
self.sampling_rate = sampling_rate
self.max_time = max_time
self.force_feature_recalc = force_feature_recalc
def do_stft(self,y,channels):
spectra = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y[channel,:],
n_fft = 2048,
hop_length = 512,
window = 'hann',
center = True))
spectrum = np.asarray(spectrum,dtype=np.float32)
spectra.append(spectrum)
spectra = np.stack(spectra,axis=0)
return spectra
def create_feature(self,sample):
"""Creates the features by doing a STFT"""
try:
filename = sample['filename']
channels_str = sample['channels']
channels = int(channels_str.split(" ")[0])
if channels == 1:
mono = True
else:
mono = False
y, sr = librosa.core.load(filename,mono=mono,sr=self.sampling_rate)
filename = sample['filename']
if os.path.isfile(filename.replace("mp3","npz")) and not(self.force_feature_recalc):
return None
channels_str = sample['channels']
channels = int(channels_str.split(" ")[0])
if channels == 1:
mono = True
else:
mono = False
y, sr = librosa.core.load(filename,mono=mono,sr=self.sampling_rate)
if mono == 1:
y = np.expand_dims(y,0)
duration = y.shape[-1]/self.sampling_rate
#If the sample is longer than two times the maximum audio length, the segments are split
if duration > 2*self.max_time:
y_begin = y[:,:int(self.sampling_rate*self.max_time/2)]
y_end = y[:,-int(self.sampling_rate*self.max_time/2):]
spectra_begin = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y_begin[channel,:],
n_fft = 2048,
hop_length = 512,
window = 'hann',
center = True))
spectrum = np.asarray(spectrum,dtype=np.float32)
spectra_begin.append(spectrum)
spectra_begin = np.stack(spectra_begin,axis=0)
spectra_end = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y_end[channel,:],
n_fft = 2048,
hop_length = 512,
window = 'hann',
center = True))
spectrum = np.asarray(spectrum,dtype=np.float32)
spectra_end.append(spectrum)
spectra_end = np.stack(spectra_end,axis=0)
if mono == 1:
y = np.expand_dims(y,0)
spectra = np.concatenate([spectra_begin,spectra_end],axis=-1)
else:
#If sample is > max_time and < 2*max time take the beginning of the file
if duration >= self.max_time:
y = y[:,:int(self.sampling_rate*self.max_time)]
duration = y.shape[-1]/self.sampling_rate
#If the sample is longer than two times the maximum audio length, the segments are split
if duration > 2*self.max_time:
y_begin = y[:,:int(self.sampling_rate*self.max_time/2)]
y_end = y[:,-int(self.sampling_rate*self.max_time/2):]
#Transform first and last part of audio
spectra_begin = self.do_stft(y_begin,channels)
spectra_end = self.do_stft(y_end,channels)
spectra = np.concatenate([spectra_begin,spectra_end],axis=-1)
else:
#pad end with zeros
y_tmp = np.zeros((channels,int(self.sampling_rate*self.max_time)))
y_tmp[:,:len(y[0,:])] = y
y = y_tmp
spectra = []
#STFT for all channels
for channel in range(channels):
spectrum = np.abs(librosa.core.stft(y[channel,:],
n_fft = 2048,
hop_length = 512,
window = 'hann',
center = True))
spectrum = np.asarray(spectrum,dtype=np.float32)
spectra.append(spectrum)
spectra = np.stack(spectra,axis=0)
spectra = np.concatenate([spectra,np.zeros((channels,spectra.shape[1],1))],axis=-1)
np.savez(filename.replace("mp3","npz"),spectra)
#If sample is > max_time and < 2*max time take the beginning of the file
if duration >= self.max_time:
y = y[:,:int(self.sampling_rate*self.max_time)]
else:
#pad end with zeros
y_tmp = np.zeros((channels,int(self.sampling_rate*self.max_time)))
y_tmp[:,:len(y[0,:])] = y
y = y_tmp
spectra = self.do_stft(y,channels)
#Pad spectra to match the size of case duration > 2*max_time
spectra = np.concatenate([spectra,np.zeros((channels,spectra.shape[1],1))],axis=-1)
np.savez(filename.replace("mp3","npz"),spectra)
except:
spectra = None
print(sample['filename']+" failed at feature extraction!")
return spectra
def create_all_features(self):
......@@ -233,11 +220,24 @@ class DataGenerator(object):
def create_all_features_multi_cpu(self):
if self.is_training:
samples = self.dataset.train_samples
all_samples = self.dataset.train_samples
else:
samples = self.dataset.test_samples
n = len(samples)
all_samples = self.dataset.test_samples
mp3_filenames = glob.glob(self.dataset.train_audio_path + "/**/*",
recursive = True)
samples = []
for sample in all_samples:
filename = sample['filename']
if not(os.path.isfile(filename.replace("mp3","npz"))) or self.force_feature_recalc:
samples.append(sample)
print(str(len(all_samples)-len(samples))+" feature samples already exist")
n = len(samples)
pool = multiprocessing.Pool(os.cpu_count())
for i, _ in enumerate(pool.imap_unordered(self.create_feature, samples), 1):
sys.stderr.write('\rdone {0:%}'.format(max(0,i/n)))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment