Spaces:

lcipolina
/

game_reasoning_arena

Running

File size: 12,411 Bytes

#!/usr/bin/env python3
"""
simulate.py

Core simulation logic for a single game simulation.
Handles environment creation, policy initialization, and the simulation loop.
"""

import logging
import sys
from pathlib import Path
from typing import Dict, Any

# Ensure the src directory is in the Python path
current_dir = Path(__file__).parent
src_dir = current_dir / ".." / "src"
sys.path.insert(0, str(src_dir.resolve()))

# pylint: disable=wrong-import-position
from game_reasoning_arena.arena.utils.seeding import set_seed
from game_reasoning_arena.arena.games.registry import registry  # Games registry
from game_reasoning_arena.backends import initialize_llm_registry
from game_reasoning_arena.arena.agents.policy_manager import (
    initialize_policies, policy_mapping_fn
)
from game_reasoning_arena.arena.utils.loggers import SQLiteLogger
from torch.utils.tensorboard import SummaryWriter


logger = logging.getLogger(__name__)


def log_llm_action(agent_id: int,
                   agent_model: str,
                   observation: Dict[str, Any],
                   chosen_action: int,
                   reasoning: str,
                   flag: bool = False
                   ) -> None:
    """Logs the LLM agent's decision."""
    logger.info("Board state: \n%s", observation['state_string'])
    logger.info("Legal actions: %s", observation['legal_actions'])
    logger.info(
        "Agent %s (%s) chose action: %s with reasoning: %s",
        agent_id, agent_model, chosen_action, reasoning
    )
    if flag:
        logger.error("Terminated due to illegal move: %s.", chosen_action)


def compute_actions(env, player_to_agent, observations):
    """
    Computes actions for all agents in the current state.

    Args:
        env: The environment (OpenSpiel env).
        player_to_agent: Dictionary mapping player IDs to agent instances.
        observations: Dictionary of observations for each player.

    Returns:
        Dictionary mapping player IDs to their chosen actions.
        Also stores reasoning in agent objects for later retrieval.
    """

    def extract_action_and_store_reasoning(player_id, agent_response):
        agent = player_to_agent[player_id]
        if isinstance(agent_response, dict) and "action" in agent_response:
            # Store reasoning in the agent object for later retrieval
            if "reasoning" in agent_response:
                agent.last_reasoning = agent_response["reasoning"]
            else:
                agent.last_reasoning = "None"
            return agent_response.get("action", -1)
        else:
            # Fallback for unexpected response formats
            agent.last_reasoning = "None"
            return -1

    if env.state.is_simultaneous_node():
        # Simultaneous-move game: All players act at once
        actions = {}
        for player in player_to_agent:
            agent_response = player_to_agent[player](observations[player])
            actions[player] = extract_action_and_store_reasoning(
                player, agent_response)
        return actions
    else:
        # Turn-based game: Only the current player acts
        current_player = env.state.current_player()
        agent_response = player_to_agent[current_player](
            observations[current_player])
        return {current_player: extract_action_and_store_reasoning(
            current_player, agent_response)}


def simulate_game(game_name: str, config: Dict[str, Any], seed: int) -> str:
    """
    Runs a game simulation, logs agent actions and final rewards to
    TensorBoard.

    Args:
        game_name: The name of the game.
        config: Simulation configuration.
        seed: Random seed for reproducibility.

    Returns:
        str: Confirmation that the simulation is complete.
    """

    # Set global seed for reproducibility across all random number generators
    set_seed(seed)

    # Initialize LLM registry
    initialize_llm_registry()

    # Initialize loggers for all agents
    logger.info("Initializing environment for %s.", game_name)

    # Assign players to their policy classes
    policies_dict = initialize_policies(config, game_name, seed)

    # Initialize loggers and writers for all agents
    agent_loggers_dict = {}
    for agent_id, policy_name in enumerate(policies_dict.keys()):
        # Get agent config and pass it to the logger
        player_key = f"player_{agent_id}"
        default_config = {"type": "unknown", "model": "None"}
        agent_config = config["agents"].get(player_key, default_config)

        # Sanitize model name for filename use
        model_name = agent_config.get("model", "None")
        sanitized_model_name = model_name.replace("-", "_").replace("/", "_")
        agent_loggers_dict[policy_name] = SQLiteLogger(
            agent_type=agent_config["type"],
            model_name=sanitized_model_name
        )
    writer = SummaryWriter(log_dir=f"runs/{game_name}")  # Tensorboard writer

    # Create player_to_agent mapping for RLLib-style action computation
    player_to_agent = {}
    for i, policy_name in enumerate(policies_dict.keys()):
        player_to_agent[i] = policies_dict[policy_name]

    # Loads the pyspiel game and the env simulator
    env = registry.make_env(game_name, config)

    for episode in range(config["num_episodes"]):
        episode_seed = seed + episode
        observation_dict, _ = env.reset(seed=episode_seed)
        terminated = truncated = False
        rewards_dict = {}  # Initialize rewards_dict

        logger.info(
            "Episode %d started with seed %d.", episode + 1, episode_seed
            )
        turn = 0

        while not (terminated or truncated):
            # Use RLLib-style action computation
            try:
                action_dict = compute_actions(
                    env, player_to_agent, observation_dict
                )
            except Exception as e:
                logger.error("Error computing actions: %s", e)
                truncated = True
                break

            # Process each action for logging and validation
            for agent_id, chosen_action in action_dict.items():
                policy_key = policy_mapping_fn(agent_id)
                agent_logger = agent_loggers_dict[policy_key]
                observation = observation_dict[agent_id]

                # Get agent config for logging - ensure we get the right
                # agent's config
                agent_type = None
                agent_model = "None"
                player_key = f"player_{agent_id}"
                if player_key in config["agents"]:
                    agent_config = config["agents"][player_key]
                    agent_type = agent_config["type"]
                    # Only set model for LLM agents
                    if agent_type == "llm":
                        agent_model = agent_config.get("model", "None")
                    else:
                        agent_model = "None"

                # Check if the chosen action is legal
                if (chosen_action is None or
                        chosen_action not in observation["legal_actions"]):
                    logger.error(
                        f"ILLEGAL MOVE DETECTED - Agent {agent_id}: "
                        f"chosen_action={chosen_action} (type: {type(chosen_action)}), "
                        f"legal_actions={observation['legal_actions']}"
                    )
                    if agent_type == "llm":
                        log_llm_action(
                            agent_id, agent_model, observation,
                            chosen_action, "Illegal action", flag=True
                        )
                    agent_logger.log_illegal_move(
                        game_name=game_name, episode=episode + 1, turn=turn,
                        agent_id=agent_id, illegal_action=chosen_action,
                        reason="Illegal action",
                        board_state=observation["state_string"]
                    )
                    truncated = True
                    break

                # Get reasoning if available (for LLM agents)
                reasoning = "None"
                if (agent_type == "llm" and
                        hasattr(player_to_agent[agent_id], 'last_reasoning')):
                    reasoning = getattr(
                        player_to_agent[agent_id], 'last_reasoning', "None"
                    )

                # Logging
                opponents_list = []
                for a_id in config["agents"]:
                    if a_id != f"player_{agent_id}":
                        opp_agent_type = config['agents'][a_id]['type']
                        model = config['agents'][a_id].get('model', 'None')
                        model_clean = model.replace('-', '_')
                        opponents_list.append(f"{opp_agent_type}_{model_clean}")
                opponents = ", ".join(opponents_list)

                agent_logger.log_move(
                    game_name=game_name,
                    episode=episode + 1,
                    turn=turn,
                    action=chosen_action,
                    reasoning=reasoning,
                    opponent=opponents,
                    generation_time=0.0,  # TODO: Add timing back
                    agent_type=agent_type,
                    agent_model=agent_model,
                    seed=episode_seed,
                    board_state=observation["state_string"]
                )

                if agent_type == "llm":
                    log_llm_action(
                        agent_id, agent_model, observation,
                        chosen_action, reasoning
                    )

            # Step forward in the environment
            if not truncated:
                (observation_dict, rewards_dict,
                 terminated, truncated, _) = env.step(action_dict)
                turn += 1

        # Logging
        game_status = "truncated" if truncated else "terminated"
        logger.info(
            "Game status: %s with rewards dict: %s", game_status, rewards_dict
        )

        for agent_id, reward in rewards_dict.items():
            policy_key = policy_mapping_fn(agent_id)
            agent_logger = agent_loggers_dict[policy_key]

            # Calculate opponents for this agent
            opponents_list = []
            for a_id in config["agents"]:
                if a_id != f"player_{agent_id}":
                    opp_agent_type = config['agents'][a_id]['type']
                    opp_model = config['agents'][a_id].get('model', 'None')
                    opp_model_clean = opp_model.replace('-', '_')
                    opponent_str = f"{opp_agent_type}_{opp_model_clean}"
                    opponents_list.append(opponent_str)
            opponents = ", ".join(opponents_list)

            # Log reward to the rewards table
            agent_logger.log_rewards(
                game_name=game_name,
                episode=episode + 1,
                reward=reward
            )

            agent_logger.log_game_result(
                game_name=game_name,
                episode=episode + 1,
                status=game_status,
                reward=reward,
                opponent=opponents
            )
            # Tensorboard logging
            agent_type = "unknown"
            agent_model = "None"

            # Find the agent config by index - handle both string and int keys
            for key, value in config["agents"].items():
                if (key.startswith("player_") and
                        int(key.split("_")[1]) == agent_id):
                    agent_type = value["type"]
                    agent_model = value.get("model", "None")
                    break
                elif str(key) == str(agent_id):
                    agent_type = value["type"]
                    agent_model = value.get("model", "None")
                    break

            tensorboard_key = f"{agent_type}_{agent_model.replace('-', '_')}"
            writer.add_scalar(
                f"Rewards/{tensorboard_key}", reward, episode + 1
            )

        logger.info(
            "Simulation for game %s, Episode %d completed.",
            game_name, episode + 1
        )
    writer.close()
    return "Simulation Completed"

# start tensorboard from the terminal:
# tensorboard --logdir=runs

# In the browser:
# http://localhost:6006/