--n_epochs = 100 --patience = -1 --mbatch_size = 64 --lr = 0.001 --lr_scheduler = null --lr_scheduler_kwargs = {} --param_lr_map = {} --grad_clip = "norm" --grad_clip_kwargs = {"max_norm": 5} --regularization = 0.0 --l1_reg = 0.0 --l2_reg = 0.0001 --optimizer = "Adam" --loss = "L1Loss" --initializer = null --initialization_seed = 0 --batch_shuffle_seed = 0