Interface LoadModelConfig

interface LoadModelConfig {
    cache_type_k?:
        | "f32"
        | "f16"
        | "q8_0"
        | "q5_1"
        | "q5_0"
        | "q4_1"
        | "q4_0";
    cache_type_v?: "f32"
    | "f16"
    | "q8_0"
    | "q5_1"
    | "q5_0"
    | "q4_1"
    | "q4_0";
    embeddings?: boolean;
    n_batch?: number;
    n_ctx?: number;
    n_threads?: number;
    offload_kqv?: boolean;
    pooling_type?:
        | "LLAMA_POOLING_TYPE_UNSPECIFIED"
        | "LLAMA_POOLING_TYPE_NONE"
        | "LLAMA_POOLING_TYPE_MEAN"
        | "LLAMA_POOLING_TYPE_CLS";
    rope_freq_base?: number;
    rope_freq_scale?: number;
    rope_scaling_type?: | "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED"
    | "LLAMA_ROPE_SCALING_TYPE_NONE"
    | "LLAMA_ROPE_SCALING_TYPE_LINEAR"
    | "LLAMA_ROPE_SCALING_TYPE_YARN";
    seed?: number;
    yarn_attn_factor?: number;
    yarn_beta_fast?: number;
    yarn_beta_slow?: number;
    yarn_ext_factor?: number;
    yarn_orig_ctx?: number;
}

Properties

cache_type_k?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
cache_type_v?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
embeddings?: boolean
n_batch?: number
n_ctx?: number
n_threads?: number
offload_kqv?: boolean
pooling_type?:
    | "LLAMA_POOLING_TYPE_UNSPECIFIED"
    | "LLAMA_POOLING_TYPE_NONE"
    | "LLAMA_POOLING_TYPE_MEAN"
    | "LLAMA_POOLING_TYPE_CLS"
rope_freq_base?: number
rope_freq_scale?: number
rope_scaling_type?:
    | "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED"
    | "LLAMA_ROPE_SCALING_TYPE_NONE"
    | "LLAMA_ROPE_SCALING_TYPE_LINEAR"
    | "LLAMA_ROPE_SCALING_TYPE_YARN"
seed?: number
yarn_attn_factor?: number
yarn_beta_fast?: number
yarn_beta_slow?: number
yarn_ext_factor?: number
yarn_orig_ctx?: number