training: run_name: example_config epochs: 150 learning_rate: 0.0001 log_interval: 100 checkpoint_dir: null resume_from: null n_iterative_pseudolabeling: 3 lr_scheduler: _target_: torch.optim.lr_scheduler.CosineAnnealingLR T_max: ${training.epochs} eta_min: 1.0e-06 lr_scheduler_start_epoch: -1 dataset: root: data/LibriSpeech train_split: train-clean-100 test_split: test-clean batch_size: 32 num_workers: 1 noise_dir: noise_fullband max_audio_length: 160000 augmentations: max_augs: 4 # in all our experiments we used 4 time_stretch: true pitch_shift: true reverberation: true noise: true rir_dir: data/rirs activate_extra_augs: true echo: enabled: true volume_range: - 0.1 - 0.5 duration_range: - 0.1 - 0.5 random_noise: enabled: true noise_std: 0.001 pink_noise: enabled: true noise_std: 0.01 lowpass_filter: enabled: true cutoff_freq: 5000 highpass_filter: enabled: true cutoff_freq: 500 bandpass_filter: enabled: true cutoff_freq_low: 300 cutoff_freq_high: 8000 smooth: enabled: true window_size_range: - 2 - 10 boost_audio: enabled: true amount: 20 duck_audio: enabled: true amount: 20 updownresample: enabled: true intermediate_freq: 32000 model: name: hubert-base-ls960 layer: 6 vocab_size: 500 kind_kmeans: kmeans quantizer: hidden_dim: 256