{ "llm_model": "llm.mnn", "llm_weight": "llm.mnn.weight", "embedding_file": "embeddings_int4.bin", "backend_type": "cpu", "thread_num": 4, "precision": "low", "memory": "low", "mllm": { "backend_type": "cpu", "thread_num": 4, "precision": "low", "memory": "low" }, "sampler_type": "mixed", "mixed_samplers": [ "penalty", "topK", "topP", "min_p", "temperature" ], "penalty": 1.0, "temperature": 1.0, "topP": 0.95, "topK": 20, "min_p": 0, "max_new_tokens": 40960 }