Source code for yawning_titan.envs.generic.helpers.eval_printout

"""
Util to print out agent evaluation metrics.

The metrics printed out are:
    - Total episodes elapsed
    - Absolute wins for red and blue
    - Percentage win rate for red and blue
    - Average episode length
    - Actions taken by blue each game/Average actions taken by blue over n games
"""

from collections import Counter, defaultdict
from typing import List, Tuple

from tabulate import tabulate


[docs]class EvalPrintout:
    """Class to represnt an Eval Printer."""

[docs]    def __init__(self, avg_every: int):
        """
        Initialise printout object.

        Args:
            avg_every: Number of timesteps to average stats over
        """
        # Assert that the number of timesteps to average over must be a positive, non-zero integer
        if avg_every < 1:
            raise ValueError("avg_every must be greater than or equal to 1")
        elif not (isinstance(avg_every, int)):
            raise ValueError("avg_every must be an integer")
        self.avg_every = avg_every

[docs]    def print_stats(self, game_stats_list: List[dict], total_games: int):
        """
        Print out the (averaged) stats from the last avg_every number of games to the console.

        Args:
            game_stats_list: List of dictionaries containing the last avg_every number of game stats
            total_games: Total games played since starting
        """
        print("--Game over--")
        print("Total number of Games Played: ", total_games)

        # Calculate average metrics from the list of individual game metrics
        (
            blue_wins,
            red_wins,
            percentage_blue,
            percentage_red,
            avg_duration,
            avg_actions,
        ) = self.calculate_metrics(game_stats_list)

        # If printing every game, no need to print blue/red win ratio
        if self.avg_every == 1:
            print(game_stats_list[-1]["Winner"], "wins!")
            print("Episode length: ", game_stats_list[-1]["Duration"])
        # If printing every avg_every games, use different messages and print blue/red win ratio
        else:
            print(f"Stats over the last {self.avg_every} games:")
            print("Average episode length: ", avg_duration, "\n")
            print(
                tabulate(
                    [
                        (blue_wins, red_wins),
                        (f"{percentage_blue}%", f"{percentage_red}%"),
                    ],
                    headers=["Blue Won", "Red Won"],
                )
            )
            print("\n")

        # Print actions used by blue
        print(
            tabulate(
                [(x[0], x[1][0], f"{x[1][1]}%") for x in list(avg_actions)],
                headers=["Action", "Avg Times Used", "Percentage of Action Usage"],
            )
        )
        print("\n\n")

[docs]    def calculate_metrics(
        self, game_stats_list: List[dict]
    ) -> Tuple[int, int, float, float, int, list]:
        """
        Calculate the metrics to be printed.

        Args:
            game_stats_list: List of dictionaries containing the last avg_every number of game stats

        Returns:
            blue_wins: Number of games blue won in the last avg_every number of games
            red_wins: Number of games red won in the last avg_every number of games
            percentage_blue: Percentage of games blue won in the last avg_every number of games
            percentage_red: Percentage of games red won in the last avg_every number of games
            avg_duration: Average number of timesteps per episode over the last avg_every number of games
            sorted_actions: Dictionary of actions taken by blue, averaged over the last avg_every number of games
                            and ordered by frequency of each action from highest to lowest. Dictionary values are
                            tuples: (average frequency of action, action usage percentage)
        """
        winner_list = []
        duration_list = []
        action_list = []

        cumulative_actions = Counter({})
        combined_actions = defaultdict(list)
        blue_wins = 0
        red_wins = 0

        # Split stats list into separate lists containing winners, game durations, and actions taken by blue
        for game in game_stats_list:
            game_actions = {}

            winner_list.append(game["Winner"])
            duration_list.append(game["Duration"])
            for k, v in game.items():
                if k not in ["Winner", "Duration"]:
                    game_actions[k] = v
            action_list.append(game_actions)

        # Count how many times blue and red won
        for winner in winner_list:
            if winner == "blue":
                blue_wins += 1
            else:
                red_wins += 1

        # Calculate blue/red win ratios
        percentage_blue = round((blue_wins / self.avg_every) * 100, 2)
        percentage_red = round((red_wins / self.avg_every) * 100, 2)

        # Calculate the average number of timesteps that episodes last for
        total_duration = sum(duration_list)
        avg_duration = round(total_duration / self.avg_every)

        # Calculate blue's average usage for each action
        for actions in action_list:
            cumulative_actions += actions

        avg_actions = {
            k: round(v / self.avg_every) for k, v in dict(cumulative_actions).items()
        }

        # Calculate percentage of blue's action usage for each action
        total_actions = sum(avg_actions.values())

        if total_actions == 0:
            total_actions = 1

        percentage_actions = {
            k: round((v / total_actions) * 100, 2) for k, v in avg_actions.items()
        }

        # Combine average action usage and percentage of action usage into the same dictionary (values are tuples)
        for d in (avg_actions, percentage_actions):
            for k, v in d.items():
                combined_actions[k].append(v)

        # Sort the actions in order from highest average usage to lowest
        sorted_actions = sorted(
            combined_actions.items(), key=lambda item: item[1], reverse=True
        )

        return (
            blue_wins,
            red_wins,
            percentage_blue,
            percentage_red,
            avg_duration,
            sorted_actions,
        )