# Copyright (c) 2022, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause model: arch: blip2_t5_instruct_pro_moe model_type: flant5xxl load_finetuned: False load_pretrained: True vit_model: eva_clip_g pretrained: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/blip2/blip2-flant5-xxl/blip2_pretrained_flant5xxl.pth" finetuned: "" # vit encoder image_size: 224 drop_path_rate: 0 use_grad_checkpoint: False vit_precision: "fp16" # Q-Former num_query_token: 32 qformer_text_input: True # T5 t5_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/google-flan-t5-xxl" prompt: "" max_txt_len: 256 max_output_txt_len: 256 # freeze freeze_vit: True freeze_llm: True freeze_qformer: False freeze_t5_proj: False # moe moe_position: "pre" # post (position to insert PromptMoE Part) embed_extract: "blip2_pretrain" # t5, random (way to extract embeddings of task instruction if moe_position is pre) repeat_to_init_qt_candidates: True num_qt_candidates: 20 moe_topk: 2 eval_gate_save: False train_gate_save: False gate_save_path: "/mnt/pfs-guan-ssai/nlu/wanghanzi/experiments/blip2/flant5xxl/prompt_moe/llava_st_257k_raw_train_qf_train_qt_linear_gate_textt5_20ex_3loss_textinqf_epo3_1012/" preprocess: vis_processor: train: name: "blip_image_train" image_size: 224 eval: name: "blip_image_eval" image_size: 224 text_processor: train: name: "blip_caption" eval: name: "blip_caption"