|
| 1 | +# Copyright 2023–2025 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +base_config: "base.yml" |
| 16 | + |
| 17 | +use_sft: True |
| 18 | +use_multimodal: True |
| 19 | +# For vision, the prompt contains image, we only train on completion tokens |
| 20 | +sft_train_on_completion_only: True |
| 21 | +packing: False # packing is not supported yet |
| 22 | +freeze_vision_encoder_params: True |
| 23 | +learning_rate: 2.e-5 |
| 24 | + |
| 25 | +# -------------- HF pipeline -------------- |
| 26 | +dataset_type: hf |
| 27 | +hf_path: 'NTT-hil-insight/SlideVQA' |
| 28 | +train_split: 'train' |
| 29 | +hf_eval_split: 'val' |
| 30 | +train_data_columns: ['question', 'answer'] # the first column is prompt, second column is completion |
| 31 | +eval_data_columns: ['question', 'answer'] # the first column is prompt, second column is completion |
| 32 | +train_image_column: ['page_1', 'page_2', 'page_3', 'page_4', 'page_5', 'page_6', 'page_7', 'page_8', 'page_9', 'page_10', 'page_11', 'page_12', 'page_13', 'page_14', 'page_15', 'page_16', 'page_17', 'page_18', 'page_19', 'page_20'] # list of image columns |
| 33 | +eval_image_column: ['page_1', 'page_2', 'page_3', 'page_4', 'page_5', 'page_6', 'page_7', 'page_8', 'page_9', 'page_10', 'page_11', 'page_12', 'page_13', 'page_14', 'page_15', 'page_16', 'page_17', 'page_18', 'page_19', 'page_20'] # list of image columns |
0 commit comments