Optionalcache_type_k
cache_type_k?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
Optionalcache_type_v
cache_type_v?: "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0"
Optionalembeddings
embeddings?: boolean
Optionalflash_attn
flash_attn?: boolean
Optionaln_batch
n_batch?: number
Optionaln_ctx
n_ctx?: number
Optionaln_threads
n_threads?: number
Optionaloffload_kqv
offload_kqv?: boolean
Optionalpooling_type
pooling_type?:
| "LLAMA_POOLING_TYPE_UNSPECIFIED"
| "LLAMA_POOLING_TYPE_NONE"
| "LLAMA_POOLING_TYPE_MEAN"
| "LLAMA_POOLING_TYPE_CLS"
Optionalrope_freq_base
rope_freq_base?: number
Optionalrope_freq_scale
rope_freq_scale?: number
Optionalrope_scaling_type
rope_scaling_type?:
| "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED"
| "LLAMA_ROPE_SCALING_TYPE_NONE"
| "LLAMA_ROPE_SCALING_TYPE_LINEAR"
| "LLAMA_ROPE_SCALING_TYPE_YARN"
Optionalseed
seed?: number
Optionalyarn_attn_factor
yarn_attn_factor?: number
Optionalyarn_beta_fast
yarn_beta_fast?: number
Optionalyarn_beta_slow
yarn_beta_slow?: number
Optionalyarn_ext_factor
yarn_ext_factor?: number
Optionalyarn_orig_ctx
yarn_orig_ctx?: number