Optional
cache_type_k
cache_type_k?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
Optional
cache_type_v
cache_type_v?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
Optional
embeddings
embeddings?: boolean
Optional
n_batch
n_batch?: number
Optional
n_ctx
n_ctx?: number
Optional
n_threads
n_threads?: number
Optional
offload_kqv
offload_kqv?: boolean
Optional
pooling_type
pooling_type?:
| "LLAMA_POOLING_TYPE_UNSPECIFIED"
| "LLAMA_POOLING_TYPE_NONE"
| "LLAMA_POOLING_TYPE_MEAN"
| "LLAMA_POOLING_TYPE_CLS"
Optional
rope_freq_base
rope_freq_base?: number
Optional
rope_freq_scale
rope_freq_scale?: number
Optional
rope_scaling_type
rope_scaling_type?:
| "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED"
| "LLAMA_ROPE_SCALING_TYPE_NONE"
| "LLAMA_ROPE_SCALING_TYPE_LINEAR"
| "LLAMA_ROPE_SCALING_TYPE_YARN"
Optional
seed
seed?: number
Optional
yarn_attn_factor
yarn_attn_factor?: number
Optional
yarn_beta_fast
yarn_beta_fast?: number
Optional
yarn_beta_slow
yarn_beta_slow?: number
Optional
yarn_ext_factor
yarn_ext_factor?: number
Optional
yarn_orig_ctx
yarn_orig_ctx?: number