Interface LoadModelParams

interface LoadModelParams {
    cache_type_k?:
        | "f32"
        | "f16"
        | "q8_0"
        | "q5_1"
        | "q5_0"
        | "q4_1"
        | "q4_0";
    cache_type_v?: "f32"
    | "f16"
    | "q8_0"
    | "q5_1"
    | "q5_0"
    | "q4_1"
    | "q4_0";
    chat_template?: string;
    embeddings?: boolean;
    flash_attn?: boolean;
    jinja?: boolean;
    log_level?: LogLevel;
    n_batch?: number;
    n_ctx?: number;
    n_gpu_layers?: number;
    n_threads?: number;
    offload_kqv?: boolean;
    pooling_type?:
        | "LLAMA_POOLING_TYPE_UNSPECIFIED"
        | "LLAMA_POOLING_TYPE_NONE"
        | "LLAMA_POOLING_TYPE_MEAN"
        | "LLAMA_POOLING_TYPE_CLS";
    reasoning?: boolean;
    rope_freq_base?: number;
    rope_freq_scale?: number;
    rope_scaling_type?: | "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED"
    | "LLAMA_ROPE_SCALING_TYPE_NONE"
    | "LLAMA_ROPE_SCALING_TYPE_LINEAR"
    | "LLAMA_ROPE_SCALING_TYPE_YARN";
    seed?: number;
    swa_full?: boolean;
    yarn_attn_factor?: number;
    yarn_beta_fast?: number;
    yarn_beta_slow?: number;
    yarn_ext_factor?: number;
    yarn_orig_ctx?: number;
}

Properties

cache_type_k?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
cache_type_v?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
chat_template?: string
embeddings?: boolean
flash_attn?: boolean
jinja?: boolean
log_level?: LogLevel
n_batch?: number
n_ctx?: number
n_gpu_layers?: number
n_threads?: number
offload_kqv?: boolean
pooling_type?:
    | "LLAMA_POOLING_TYPE_UNSPECIFIED"
    | "LLAMA_POOLING_TYPE_NONE"
    | "LLAMA_POOLING_TYPE_MEAN"
    | "LLAMA_POOLING_TYPE_CLS"
reasoning?: boolean
rope_freq_base?: number
rope_freq_scale?: number
rope_scaling_type?:
    | "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED"
    | "LLAMA_ROPE_SCALING_TYPE_NONE"
    | "LLAMA_ROPE_SCALING_TYPE_LINEAR"
    | "LLAMA_ROPE_SCALING_TYPE_YARN"
seed?: number
swa_full?: boolean
yarn_attn_factor?: number
yarn_beta_fast?: number
yarn_beta_slow?: number
yarn_ext_factor?: number
yarn_orig_ctx?: number