Source code for yawning_titan.game_modes.components.rewards

from __future__ import annotations

from typing import Optional

from yawning_titan.config.core import ConfigGroup
from yawning_titan.config.item_types.bool_item import BoolItem, BoolProperties
from yawning_titan.config.item_types.int_item import IntItem, IntProperties
from yawning_titan.config.item_types.str_item import StrItem, StrProperties

# --- Tier 0 groups


[docs]class Rewards(ConfigGroup):
    """The rewards the blue agent receives based upon the final game state."""

[docs]    def __init__(
        self,
        for_loss: Optional[int] = 0,
        for_reaching_max_steps: Optional[int] = 0,
        end_rewards_are_multiplied_by_end_state: Optional[bool] = False,
        reduce_negative_rewards_for_closer_fails: Optional[bool] = False,
        function: Optional[str] = "standard_rewards",
    ):
        from yawning_titan.envs.generic.core import reward_functions

        doc = "The rewards the blue agent gets for different game states"

        self.for_loss = IntItem(
            value=for_loss,
            doc="Rewards for the blue agent losing",
            properties=IntProperties(allow_null=True, default=0),
            alias="rewards_for_loss",
        )
        self.for_reaching_max_steps = IntItem(
            value=for_reaching_max_steps,
            doc="Rewards for the blue agent winning by reaching the maximum number of steps",
            properties=IntProperties(allow_null=True, default=0),
            alias="rewards_for_reaching_max_steps",
        )
        self.end_rewards_are_multiplied_by_end_state = BoolItem(
            value=end_rewards_are_multiplied_by_end_state,
            doc="How good the end state is (what % blue controls) is multiplied by the rewards that blue receives for winning",
            properties=BoolProperties(allow_null=False, default=False),
            alias="end_rewards_are_multiplied_by_end_state",
        )
        self.reduce_negative_rewards_for_closer_fails = BoolItem(
            value=reduce_negative_rewards_for_closer_fails,
            doc="The negative rewards from the red agent winning are reduced the closer to the end the blue agent gets",
            properties=BoolProperties(allow_null=False, default=False),
            alias="reduce_negative_rewards_for_closer_fails",
        )
        self.function: StrItem = StrItem(
            value=function,
            doc=(
                "There are several built in example reward methods that you can choose from (shown below) "
                "You can also create your own reward method by copying one of the built in methods and calling it here "
                "built in reward methods: standard_rewards, one_per_timestep, safe_nodes_give_rewards, punish_bad_actions"
            ),
            properties=StrProperties(
                default="standard_rewards",
                options=list(reward_functions.__dict__.keys()),
            ),
            alias="reward_function",
        )
        super().__init__(doc)