model_args:
  attn_implementation: flash_attention_2
  bnb_4bit_quant_type: nf4
  load_in_4bit: false
  load_in_8bit: false
  lora_alpha: 32
  lora_dropout: 0.05
  lora_modules_to_save: null
  lora_r: 16
  lora_target_modules: null
  lora_task_type: CAUSAL_LM
  model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
  model_revision: main
  torch_dtype: bfloat16
  trust_remote_code: false
  use_bnb_nested_quant: false
  use_dora: false
  use_peft: false
  use_rslora: false
script_args:
  cosine_max_len: 1000
  cosine_max_value_correct: 1.0
  cosine_max_value_wrong: -0.5
  cosine_min_value_correct: 0.5
  cosine_min_value_wrong: 0.0
  dataset_config: null
  dataset_name: simone-papicchio/bird
  dataset_test_split: test
  dataset_train_split: train
  gradient_checkpointing_use_reentrant: false
  ignore_bias_buffers: false
  reward_funcs:
  - qatch_metrics
  - format
  - tag_count
training_args:
  _n_gpu: 1
  accelerator_config:
    dispatch_batches: null
    even_batches: true
    gradient_accumulation_kwargs: null
    non_blocking: false
    split_batches: false
    use_configured_state: false
    use_seedable_sampler: true
  adafactor: false
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_epsilon: 1.0e-08
  add_system_prompt: true
  add_validation: false
  auto_find_batch_size: false
  average_tokens_across_devices: false
  base_db_path: data/bird_train/train_databases
  batch_eval_metrics: false
  benchmarks: []
  beta: 0.04
  bf16: true
  bf16_full_eval: false
  cache_implementation: null
  cached_file_path: /workspaces/deep_thinking/cache_target_sql2execution_BIRD_train.pkl
  callbacks: {}
  chat_template: null
  data_seed: null
  dataloader_drop_last: false
  dataloader_num_workers: 0
  dataloader_persistent_workers: false
  dataloader_pin_memory: true
  dataloader_prefetch_factor: null
  dataset_test_split_name: validation
  ddp_backend: null
  ddp_broadcast_buffers: null
  ddp_bucket_cap_mb: null
  ddp_find_unused_parameters: null
  ddp_timeout: 1800
  debug: []
  deepspeed: null
  disable_tqdm: false
  do_eval: false
  do_predict: false
  do_train: false
  ds3_gather_for_generation: true
  epsilon: 0.2
  epsilon_high: null
  eval_accumulation_steps: null
  eval_delay: 0
  eval_do_concat_batches: true
  eval_on_start: false
  eval_steps: null
  eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - 'no'
  eval_use_gather_object: false
  fp16: false
  fp16_backend: auto
  fp16_full_eval: false
  fp16_opt_level: O1
  fsdp: []
  fsdp_config:
    min_num_params: 0
    xla: false
    xla_fsdp_grad_ckpt: false
    xla_fsdp_v2: false
  fsdp_min_num_params: 0
  fsdp_transformer_layer_cls_to_wrap: null
  full_determinism: false
  gradient_accumulation_steps: 16
  gradient_checkpointing: true
  gradient_checkpointing_kwargs:
    use_reentrant: false
  greater_is_better: false
  group_by_length: false
  half_precision_backend: auto
  hub_always_push: false
  hub_model_id: Qwen2.5-1.5B-Open-R1-GRPO
  hub_model_revision: main
  hub_private_repo: null
  hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
  - every_save
  hub_token: null
  ignore_data_skip: false
  include_for_metrics: []
  include_inputs_for_metrics: false
  include_num_input_tokens_seen: false
  include_tokens_per_second: false
  jit_mode_eval: false
  label_names: null
  label_smoothing_factor: 0.0
  learning_rate: 1.0e-06
  length_column_name: length
  load_best_model_at_end: false
  local_rank: 0
  log_completions: true
  log_level: info
  log_level_replica: warning
  log_on_each_node: true
  logging_dir: ./.tensorboard_logging/f5655cd2/
  logging_first_step: true
  logging_nan_inf_filter: true
  logging_steps: 5
  logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - steps
  lr_scheduler_kwargs: {}
  lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
  - constant_with_warmup
  max_completion_length: 4096
  max_grad_norm: 0.2
  max_prompt_length: 2048
  max_steps: -1
  metric_for_best_model: loss
  min_p: null
  model_init_kwargs: '{''revision'': ''main'', ''trust_remote_code'': False, ''attn_implementation'':
    ''flash_attention_2'', ''torch_dtype'': torch.bfloat16, ''use_cache'': False}'
  mp_parameters: ''
  neftune_noise_alpha: null
  no_cuda: false
  num_completions_to_print: 1
  num_generations: 16
  num_iterations: 1
  num_train_epochs: 1.0
  optim: !!python/object/apply:transformers.training_args.OptimizerNames
  - adamw_8bit
  optim_args: null
  optim_target_modules: null
  output_dir: base_models/grpo/Qwen/Qwen2.5-Coder-7B-Instruct/bs_256_ml_4096_gen_16_f5655cd2_RL
  overwrite_hub_revision: false
  overwrite_output_dir: false
  past_index: -1
  per_device_eval_batch_size: 8
  per_device_train_batch_size: 8
  per_gpu_eval_batch_size: null
  per_gpu_train_batch_size: null
  prediction_loss_only: false
  prompt_name: text2sql_model_grpo
  push_to_hub: false
  push_to_hub_model_id: null
  push_to_hub_organization: null
  push_to_hub_revision: false
  push_to_hub_token: null
  ray_scope: last
  ref_model_mixup_alpha: 0.6
  ref_model_sync_steps: 512
  remove_unused_columns: false
  repetition_penalty: 1.0
  report_to:
  - tensorboard
  - wandb
  restore_callback_states_from_checkpoint: false
  resume_from_checkpoint: 'True'
  reward_weights:
  - 0.85
  - 0.1
  - 0.05
  run_name: exp-9-7B-QATCH
  save_on_each_node: false
  save_only_model: false
  save_safetensors: true
  save_steps: 0.1
  save_strategy: !!python/object/apply:transformers.trainer_utils.SaveStrategy
  - steps
  save_total_limit: 3
  scale_rewards: true
  seed: 42
  skip_memory_metrics: true
  stratified_by_complexity: false
  sync_ref_model: false
  temperature: 0.7
  tf32: null
  top_k: 50
  top_p: 1.0
  torch_compile: false
  torch_compile_backend: null
  torch_compile_mode: null
  torch_empty_cache_steps: null
  torchdynamo: null
  tp_size: 0
  tpu_metrics_debug: false
  tpu_num_cores: null
  use_cpu: false
  use_ipex: false
  use_legacy_prediction_loop: false
  use_liger_kernel: false
  use_liger_loss: false
  use_mps_device: false
  use_vllm: true
  validation_split: 0.2
  vllm_device: auto
  vllm_dtype: bfloat16
  vllm_enable_prefix_caching: null
  vllm_gpu_memory_utilization: 0.7
  vllm_guided_decoding_regex: null
  vllm_max_model_len: null
  vllm_server_host: 127.0.0.1
  vllm_server_port: 24879
  vllm_server_timeout: 120.0
  wandb_log_unique_prompts: true
  warmup_ratio: 0.1
  warmup_steps: 0
  weight_decay: 0.0