Spaces:
Running
Running
from characters import get_character | |
from modules.llm import get_llm_model | |
from time import time | |
if __name__ == "__main__": | |
supported_llms = [ | |
# "MiniMaxAI/MiniMax-Text-01", | |
# "Qwen/Qwen3-8B", | |
# "Qwen/Qwen3-30B-A3B", | |
# "meta-llama/Llama-3.1-8B-Instruct", | |
# "tiiuae/Falcon-H1-1B-Base", | |
# "tiiuae/Falcon-H1-3B-Instruct", | |
# "google/gemma-2-2b", | |
# "gemini-2.5-flash", | |
] | |
character_prompt = get_character("Yaoyin").prompt | |
for model_id in supported_llms: | |
try: | |
print(f"Loading model: {model_id}") | |
llm = get_llm_model(model_id, cache_dir="./.cache") | |
prompt = "你好,今天你心情怎么样?" | |
start_time = time() | |
result = llm.generate(prompt, system_prompt=character_prompt) | |
end_time = time() | |
print(f"[{model_id}] LLM inference time: {end_time - start_time:.2f} seconds") | |
print(f"[{model_id}] LLM inference result:", result) | |
except Exception as e: | |
print(f"Failed to load model {model_id}: {e}") | |
breakpoint() | |
continue | |