#!/bin/bash # MLX Server Launcher for Dragon M3 Ultra # Created: January 2025 for MLX 0.26+ # Supports local/remote models with full parameter control # Text formatting BOLD="\033[1m" BLUE="\033[34m" GREEN="\033[32m" YELLOW="\033[33m" RED="\033[31m" CYAN="\033[36m" MAGENTA="\033[35m" RESET="\033[0m" # Detect system specs TOTAL_MEMORY=$(sysctl -n hw.memsize 2>/dev/null || echo 0) TOTAL_MEMORY_GB=$((TOTAL_MEMORY / 1073741824)) CPU_BRAND=$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo "Unknown") # Check if running on M3 Ultra if [[ "$CPU_BRAND" == *"M3 Ultra"* ]] || [[ "$TOTAL_MEMORY_GB" -ge 400 ]]; then IS_M3_ULTRA=true echo -e "${BOLD}${MAGENTA}🐉 Dragon M3 Ultra detected! (${TOTAL_MEMORY_GB}GB RAM)${RESET}" else IS_M3_ULTRA=false fi echo -e "${BOLD}${BLUE}=====================================${RESET}" echo -e "${BOLD}${BLUE} MLX Server Launcher v1.0 ${RESET}" echo -e "${BOLD}${BLUE}=====================================${RESET}" echo -e "Launch MLX model server with custom parameters\n" # Default values DEFAULT_MODEL="/Users/polyversai/.lmstudio/models/LibraxisAI/c4ai-command-a-03-2025-q5-mlx" DEFAULT_HOST="0.0.0.0" DEFAULT_PORT="12345" DEFAULT_TEMP="0.7" DEFAULT_TOP_P="0.95" DEFAULT_TOP_K="0" DEFAULT_MIN_P="0.0" DEFAULT_MAX_TOKENS="2048" DEFAULT_LOG_LEVEL="INFO" # Get model path echo -e "${BOLD}Model path (local or HF repo):${RESET}" echo -e "(Default: ${DEFAULT_MODEL})" echo -e "${CYAN}Examples:${RESET}" echo -e " Local: /Users/polyversai/.lmstudio/models/mlx-community/model-name" echo -e " HF: mlx-community/Llama-3.2-3B-Instruct-4bit" read -p "> " MODEL_PATH MODEL_PATH=${MODEL_PATH:-$DEFAULT_MODEL} # Check if it's a local path if [[ -d "$MODEL_PATH" ]]; then echo -e "${GREEN}✓ Local model detected: ${MODEL_PATH}${RESET}" else echo -e "${GREEN}✓ Remote model specified: ${MODEL_PATH}${RESET}" fi # Network configuration echo -e "\n${BOLD}Host IP address:${RESET}" echo -e "(Default: ${DEFAULT_HOST} - accessible from network)" echo -e "Use 127.0.0.1 for localhost only" read -p "> " HOST HOST=${HOST:-$DEFAULT_HOST} echo -e "\n${BOLD}Port number:${RESET}" echo -e "(Default: ${DEFAULT_PORT})" read -p "> " PORT PORT=${PORT:-$DEFAULT_PORT} # Sampling parameters echo -e "\n${BOLD}${CYAN}=== Sampling Parameters ===${RESET}" echo -e "\n${BOLD}Temperature (creativity):${RESET}" echo -e "Range: 0.0-2.0 (Default: ${DEFAULT_TEMP})" echo -e "${YELLOW}0.0 = deterministic, 1.0 = balanced, 2.0 = very creative${RESET}" read -p "> " TEMP TEMP=${TEMP:-$DEFAULT_TEMP} echo -e "\n${BOLD}Top-p (nucleus sampling):${RESET}" echo -e "Range: 0.0-1.0 (Default: ${DEFAULT_TOP_P})" echo -e "${YELLOW}Lower = more focused, Higher = more diverse${RESET}" read -p "> " TOP_P TOP_P=${TOP_P:-$DEFAULT_TOP_P} echo -e "\n${BOLD}Top-k (vocabulary limit):${RESET}" echo -e "Default: ${DEFAULT_TOP_K} (0 = disabled)" echo -e "${YELLOW}Limits selection to top K tokens${RESET}" read -p "> " TOP_K TOP_K=${TOP_K:-$DEFAULT_TOP_K} echo -e "\n${BOLD}Min-p (minimum probability):${RESET}" echo -e "Range: 0.0-1.0 (Default: ${DEFAULT_MIN_P})" echo -e "${YELLOW}0.0 = disabled, higher = filter low probability tokens${RESET}" read -p "> " MIN_P MIN_P=${MIN_P:-$DEFAULT_MIN_P} echo -e "\n${BOLD}Max tokens per response:${RESET}" echo -e "(Default: ${DEFAULT_MAX_TOKENS})" if [[ "$IS_M3_ULTRA" == true ]]; then echo -e "${MAGENTA}Dragon can handle 8192+ tokens easily${RESET}" fi read -p "> " MAX_TOKENS MAX_TOKENS=${MAX_TOKENS:-$DEFAULT_MAX_TOKENS} # Optional adapter echo -e "\n${BOLD}LoRA adapter path (optional):${RESET}" echo -e "(Leave empty if not using adapters)" read -p "> " ADAPTER_PATH if [[ -n "$ADAPTER_PATH" ]]; then ADAPTER_OPTION="--adapter-path ${ADAPTER_PATH}" else ADAPTER_OPTION="" fi # Chat template args echo -e "\n${BOLD}Chat template args (optional JSON):${RESET}" echo -e "Example: {\"enable_thinking\":false}" echo -e "(Leave empty for defaults)" read -p "> " CHAT_TEMPLATE_ARGS if [[ -n "$CHAT_TEMPLATE_ARGS" ]]; then CHAT_TEMPLATE_OPTION="--chat-template-args \"${CHAT_TEMPLATE_ARGS}\"" else CHAT_TEMPLATE_OPTION="" fi # Log level echo -e "\n${BOLD}Log level:${RESET}" echo -e "(Default: ${DEFAULT_LOG_LEVEL}, Options: DEBUG, INFO, WARNING, ERROR, CRITICAL)" read -p "> " LOG_LEVEL LOG_LEVEL=${LOG_LEVEL:-$DEFAULT_LOG_LEVEL} # Build the command SERVER_CMD="uv run mlx_lm.server --model ${MODEL_PATH} --host ${HOST} --port ${PORT} --temp ${TEMP} --top-p ${TOP_P} --top-k ${TOP_K} --min-p ${MIN_P} --max-tokens ${MAX_TOKENS} --log-level ${LOG_LEVEL} ${ADAPTER_OPTION} ${CHAT_TEMPLATE_OPTION}" # Print preview echo -e "\n${BOLD}${YELLOW}Command Preview:${RESET}" echo -e "$SERVER_CMD" # Launch mode selection echo -e "\n${BOLD}${GREEN}Launch mode:${RESET}" echo -e "1. ${YELLOW}Foreground${RESET} - See logs in terminal (Ctrl+C to stop)" echo -e "2. ${YELLOW}Background with logging${RESET} - Logs to mlx-server.log" echo -e "3. ${YELLOW}Background detached${RESET} - Run with nohup" echo -e "4. ${YELLOW}Just copy command${RESET} - Don't launch" read -p "> " LAUNCH_MODE # Create logs directory if needed if [[ "$LAUNCH_MODE" == "2" || "$LAUNCH_MODE" == "3" ]]; then mkdir -p logs LOG_FILE="logs/mlx-server-$(date +%Y%m%d-%H%M%S).log" fi case "$LAUNCH_MODE" in 1) echo -e "\n${BOLD}${GREEN}Starting server in foreground...${RESET}" echo -e "${YELLOW}Press Ctrl+C to stop${RESET}\n" eval "$SERVER_CMD" ;; 2) echo -e "\n${BOLD}${GREEN}Starting server in background...${RESET}" echo -e "Logs: ${LOG_FILE}" eval "$SERVER_CMD" > "${LOG_FILE}" 2>&1 & SERVER_PID=$! echo -e "${GREEN}✓ Server started with PID: ${SERVER_PID}${RESET}" echo -e "\nTo monitor: tail -f ${LOG_FILE}" echo -e "To stop: kill ${SERVER_PID}" # Save PID for easy stopping echo $SERVER_PID > logs/mlx-server.pid ;; 3) echo -e "\n${BOLD}${GREEN}Starting server with nohup...${RESET}" echo -e "Logs: ${LOG_FILE}" nohup bash -c "$SERVER_CMD" > "${LOG_FILE}" 2>&1 & SERVER_PID=$! echo -e "${GREEN}✓ Server started with PID: ${SERVER_PID}${RESET}" echo -e "\nTo monitor: tail -f ${LOG_FILE}" echo -e "To stop: kill ${SERVER_PID}" # Save PID echo $SERVER_PID > logs/mlx-server.pid ;; 4) echo -e "\n${BOLD}${GREEN}Command copied to clipboard!${RESET}" echo "$SERVER_CMD" | pbcopy ;; *) echo -e "\n${RED}Invalid choice. Exiting.${RESET}" exit 1 ;; esac # Print API examples if [[ "$LAUNCH_MODE" != "4" ]]; then echo -e "\n${BOLD}${BLUE}=== API Usage Examples ===${RESET}" echo -e "\n${CYAN}1. Chat completion:${RESET}" echo -e "curl http://${HOST}:${PORT}/v1/chat/completions \\" echo -e " -H \"Content-Type: application/json\" \\" echo -e " -d '{" echo -e " \"messages\": [{\"role\": \"user\", \"content\": \"Hello!\"}]," echo -e " \"temperature\": ${TEMP}," echo -e " \"max_tokens\": 100" echo -e " }'" echo -e "\n${CYAN}2. Check models:${RESET}" echo -e "curl http://${HOST}:${PORT}/v1/models" echo -e "\n${CYAN}3. Health check:${RESET}" echo -e "curl http://${HOST}:${PORT}/health" if [[ "$IS_M3_ULTRA" == true ]]; then echo -e "\n${BOLD}${MAGENTA}Dragon Performance Monitoring:${RESET}" echo -e "# In another terminal:" echo -e "watch -n 1 'curl -s http://${HOST}:${PORT}/health | jq .'" fi fi echo -e "\n${BOLD}${BLUE}=====================================${RESET}" echo -e "${BOLD}${GREEN}✨ MLX Server ready!${RESET}" if [[ "$IS_M3_ULTRA" == true ]]; then echo -e "${BOLD}${MAGENTA}🐉 Dragon M3 Ultra serving at full power!${RESET}" fi echo -e "${BOLD}${BLUE}=====================================${RESET}"