model_args: attn_implementation: flash_attention_2 bnb_4bit_quant_type: nf4 load_in_4bit: false load_in_8bit: false lora_alpha: 32 lora_dropout: 0.05 lora_modules_to_save: null lora_r: 16 lora_target_modules: null lora_task_type: CAUSAL_LM model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct model_revision: main torch_dtype: bfloat16 trust_remote_code: false use_bnb_nested_quant: false use_dora: false use_peft: false use_rslora: false script_args: cosine_max_len: 1000 cosine_max_value_correct: 1.0 cosine_max_value_wrong: -0.5 cosine_min_value_correct: 0.5 cosine_min_value_wrong: 0.0 dataset_config: null dataset_name: simone-papicchio/bird dataset_test_split: test dataset_train_split: train gradient_checkpointing_use_reentrant: false ignore_bias_buffers: false reward_funcs: - qatch_metrics - format - tag_count training_args: _n_gpu: 1 accelerator_config: dispatch_batches: null even_batches: true gradient_accumulation_kwargs: null non_blocking: false split_batches: false use_configured_state: false use_seedable_sampler: true adafactor: false adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 add_system_prompt: true add_validation: false auto_find_batch_size: false average_tokens_across_devices: false base_db_path: data/bird_train/train_databases batch_eval_metrics: false benchmarks: [] beta: 0.04 bf16: true bf16_full_eval: false cache_implementation: null cached_file_path: /workspaces/deep_thinking/cache_target_sql2execution_BIRD_train.pkl callbacks: {} chat_template: null data_seed: null dataloader_drop_last: false dataloader_num_workers: 0 dataloader_persistent_workers: false dataloader_pin_memory: true dataloader_prefetch_factor: null dataset_test_split_name: validation ddp_backend: null ddp_broadcast_buffers: null ddp_bucket_cap_mb: null ddp_find_unused_parameters: null ddp_timeout: 1800 debug: [] deepspeed: null disable_tqdm: false do_eval: false do_predict: false do_train: false ds3_gather_for_generation: true epsilon: 0.2 epsilon_high: null eval_accumulation_steps: null eval_delay: 0 eval_do_concat_batches: true eval_on_start: false eval_steps: null eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy - 'no' eval_use_gather_object: false fp16: false fp16_backend: auto fp16_full_eval: false fp16_opt_level: O1 fsdp: [] fsdp_config: min_num_params: 0 xla: false xla_fsdp_grad_ckpt: false xla_fsdp_v2: false fsdp_min_num_params: 0 fsdp_transformer_layer_cls_to_wrap: null full_determinism: false gradient_accumulation_steps: 16 gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false greater_is_better: false group_by_length: false half_precision_backend: auto hub_always_push: false hub_model_id: Qwen2.5-1.5B-Open-R1-GRPO hub_model_revision: main hub_private_repo: null hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy - every_save hub_token: null ignore_data_skip: false include_for_metrics: [] include_inputs_for_metrics: false include_num_input_tokens_seen: false include_tokens_per_second: false jit_mode_eval: false label_names: null label_smoothing_factor: 0.0 learning_rate: 1.0e-06 length_column_name: length load_best_model_at_end: false local_rank: 0 log_completions: true log_level: info log_level_replica: warning log_on_each_node: true logging_dir: ./.tensorboard_logging/f5655cd2/ logging_first_step: true logging_nan_inf_filter: true logging_steps: 5 logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy - steps lr_scheduler_kwargs: {} lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType - constant_with_warmup max_completion_length: 4096 max_grad_norm: 0.2 max_prompt_length: 2048 max_steps: -1 metric_for_best_model: loss min_p: null model_init_kwargs: '{''revision'': ''main'', ''trust_remote_code'': False, ''attn_implementation'': ''flash_attention_2'', ''torch_dtype'': torch.bfloat16, ''use_cache'': False}' mp_parameters: '' neftune_noise_alpha: null no_cuda: false num_completions_to_print: 1 num_generations: 16 num_iterations: 1 num_train_epochs: 1.0 optim: !!python/object/apply:transformers.training_args.OptimizerNames - adamw_8bit optim_args: null optim_target_modules: null output_dir: base_models/grpo/Qwen/Qwen2.5-Coder-7B-Instruct/bs_256_ml_4096_gen_16_f5655cd2_RL overwrite_hub_revision: false overwrite_output_dir: false past_index: -1 per_device_eval_batch_size: 8 per_device_train_batch_size: 8 per_gpu_eval_batch_size: null per_gpu_train_batch_size: null prediction_loss_only: false prompt_name: text2sql_model_grpo push_to_hub: false push_to_hub_model_id: null push_to_hub_organization: null push_to_hub_revision: false push_to_hub_token: null ray_scope: last ref_model_mixup_alpha: 0.6 ref_model_sync_steps: 512 remove_unused_columns: false repetition_penalty: 1.0 report_to: - tensorboard - wandb restore_callback_states_from_checkpoint: false resume_from_checkpoint: 'True' reward_weights: - 0.85 - 0.1 - 0.05 run_name: exp-9-7B-QATCH save_on_each_node: false save_only_model: false save_safetensors: true save_steps: 0.1 save_strategy: !!python/object/apply:transformers.trainer_utils.SaveStrategy - steps save_total_limit: 3 scale_rewards: true seed: 42 skip_memory_metrics: true stratified_by_complexity: false sync_ref_model: false temperature: 0.7 tf32: null top_k: 50 top_p: 1.0 torch_compile: false torch_compile_backend: null torch_compile_mode: null torch_empty_cache_steps: null torchdynamo: null tp_size: 0 tpu_metrics_debug: false tpu_num_cores: null use_cpu: false use_ipex: false use_legacy_prediction_loop: false use_liger_kernel: false use_liger_loss: false use_mps_device: false use_vllm: true validation_split: 0.2 vllm_device: auto vllm_dtype: bfloat16 vllm_enable_prefix_caching: null vllm_gpu_memory_utilization: 0.7 vllm_guided_decoding_regex: null vllm_max_model_len: null vllm_server_host: 127.0.0.1 vllm_server_port: 24879 vllm_server_timeout: 120.0 wandb_log_unique_prompts: true warmup_ratio: 0.1 warmup_steps: 0 weight_decay: 0.0