"""
A collection of reward functions used be the generic network environment.
You can select the reward function that you wish to use in the config file under settings.
The reward functions take in a parameter called args. args is a dictionary that contains the
following information:
-network_interface: Interface with the network
-blue_action: The action that the blue agent has taken this turn
-blue_node: The node that the blue agent has targeted for their action
-start_state: The state of the nodes before the blue agent has taken their action
-end_state: The state of the nodes after the blue agent has taken their action
-start_vulnerabilities: The vulnerabilities before blue agents turn
-end_vulnerabilities: The vulnerabilities after the blue agents turn
-start_isolation: The isolation status of all the nodes at the start of a turn
-end_isolation: The isolation status of all the nodes at the end of a turn
-start_blue: The env as the blue agent can see it before the blue agents turn
-end_blue: The env as the blue agent can see it after the blue agents turn
The reward function returns a single number (integer or float) that is the blue agents reward for that turn.
"""
# Functions:
from __future__ import annotations
import math
from yawning_titan.envs.generic.core.network_interface import NetworkInterface
REMOVE_RED_POINTS = []
for i in range(0, 101):
REMOVE_RED_POINTS.append(round(math.exp(-0.004 * i), 4))
REDUCE_VULNERABILITY_POINTS = []
for i in range(1, 20):
REDUCE_VULNERABILITY_POINTS.append(2 / (10 + math.exp(4 - 10 * (i / 20))) + 0.5)
SCANNING_USAGE_POINTS = []
for i in range(0, 100):
SCANNING_USAGE_POINTS.append(-math.exp(-i) + 1)
[docs]def standard_rewards(args: dict) -> float:
"""
Calculate the reward for the current state of the environment.
Actions cost a certain amount and blue gets rewarded for removing red nodes and
reducing the vulnerability of nodes
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
The reward earned for this specific turn for the blue agent
"""
# Get information about the current state of the environment
network_interface: NetworkInterface = args["network_interface"]
blue_action = args["blue_action"]
start_state = args["start_state"]
end_state = args["end_state"]
start_vulnerabilities = args["start_vulnerabilities"]
end_vulnerabilities = args["end_vulnerabilities"]
start_isolation = args["start_isolation"]
end_isolation = args["end_isolation"]
start_blue = args["start_blue"]
end_blue = args["end_blue"]
# cost for actions
action_cost = {
"reduce_vulnerability": 0.5,
"restore_node": 1,
"make_node_safe": 0.5,
"scan": 0,
"isolate": 1,
"connect": 0,
"do_nothing": -0.5,
"add_deceptive_node": 8,
}
# prevent isolate reward from being duplicated
reward = -action_cost[blue_action] if blue_action != "isolate" else 0
# punish agent for every node it has isolated
reward += -action_cost["isolate"] * sum(end_isolation.values())
# calculating number of red nodes before and after the blue agents turn
initial_cumulative_states = sum(start_state.values())
final_cumulative_states = sum(end_state.values())
# punish agent for doing nothing if there are large numbers or red controlled nodes in the environment
if blue_action == "do_nothing":
reward = reward - (0.2 * final_cumulative_states)
if blue_action == "connect":
if sum(end_isolation.values()) < sum(start_isolation.values()):
reward += 5
else:
reward -= 5
# rewards for removing red nodes
if initial_cumulative_states > final_cumulative_states:
reward += REMOVE_RED_POINTS[
round(
100
* final_cumulative_states
/ network_interface.current_graph.number_of_nodes()
)
]
# punish agent for doing nothing if there are large numbers or red controlled nodes in the environment
if blue_action != "make_node_safe" and blue_action != "restore_node":
amount = (
final_cumulative_states / network_interface.current_graph.number_of_nodes()
)
if amount > 0.3:
reward = reward - amount + 0.3
# punish the blue agent for patching nodes that are already safe
if blue_action == "make_node_safe" or blue_action == "restore_node":
if initial_cumulative_states == final_cumulative_states:
reward = reward - 3
# punish the blue agent for reducing the vulnerability of a node that is already at minimum vulnerability
if blue_action == "reduce_vulnerability" or blue_action == "restore_node":
if sum(start_vulnerabilities.values()) == sum(end_vulnerabilities.values()):
reward = reward - 0.5
# reward for revealing red
if blue_action == "scan":
number = 0
for node, value in end_blue.items():
if value == 1 and start_blue[node] == 0:
if start_state[node] == 1:
number += 1
if number >= 5:
reward += 2.5
else:
reward += number * 0.5
# rewards for reducing node vulnerabilities
if (
network_interface.game_mode.red.agent_attack.ignores_defences.value is False
and blue_action == "reduce_vulnerability"
):
initial_cumulative_vuln = sum(start_vulnerabilities.values())
final_cumulative_vuln = sum(end_vulnerabilities.values())
reward = reward + (initial_cumulative_vuln - final_cumulative_vuln) * 4
if blue_action == "add_deceptive_node":
if network_interface.reached_max_deceptive_nodes:
reward -= 5
return reward
[docs]def experimental_rewards(args: dict) -> float:
"""
Calculate the reward for the current state of the environment.
Actions cost a certain amount and blue gets rewarded for removing red nodes and
reducing the vulnerability of nodes
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
The reward earned for this specific turn for the blue agent
"""
# Get information about the current state of the environment
network_interface: NetworkInterface = args["network_interface"]
blue_action = args["blue_action"]
start_state = args["start_state"]
end_state = args["end_state"]
start_blue = args["start_blue"]
end_blue = args["end_blue"]
# cost for actions
action_cost = {
"reduce_vulnerability": 0,
"restore_node": 1,
"make_node_safe": 0,
"scan": 0,
"isolate": 10,
"connect": 0,
"do_nothing": -0.5,
"add_deceptive_node": 2,
}
reward = -action_cost[blue_action]
# calculating number of red nodes before and after the blue agents turn
initial_cumulative_states = sum(start_state.values())
final_cumulative_states = sum(end_state.values())
# calculating the vulnerability of all the nodes before and after the blue agents turn
# blue's view
initial_blue_states = sum(start_blue.values())
final_blue_states = sum(end_blue.values())
# rewards for removing red nodes
# gives between 0.5 and 1
if blue_action == "make_node_safe" or blue_action == "restore_node":
if initial_blue_states > final_blue_states:
reward += REMOVE_RED_POINTS[
round(
100
* final_cumulative_states
/ network_interface.current_graph.number_of_nodes()
)
]
elif initial_cumulative_states > final_cumulative_states:
pass
else:
reward -= 1
# punish agent for doing nothing if there are large numbers or red controlled nodes in the environment
if blue_action == "do_nothing":
reward = reward - (0.5 * final_cumulative_states)
vision_score = 0.01 * (final_cumulative_states - final_blue_states) ** 2
reward -= vision_score
return reward
# A very simple example reward function
[docs]def one_per_timestep(args: dict) -> float:
"""
Give a reward for 0.1 for every timestep that the blue agent is alive.
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
0.1
"""
return 0.1
[docs]def zero_reward(args: dict) -> float:
"""
Return zero reward per timestep.
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
0
"""
return 0
[docs]def safe_nodes_give_rewards(args: dict) -> float:
"""
Give 1 reward for every safe node at that timestep.
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
The reward earned for this specific turn for the blue agent
"""
# Get information about the current state of the environment
end_state = args["end_state"]
final_cumulative_states = sum(end_state.values())
# reward is equal to the number of safe nodes
reward = len(end_state) - final_cumulative_states
return reward
[docs]def punish_bad_actions(args: dict) -> float:
"""
Just punishes bad actions bad moves.
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
The reward earned for this specific turn for the blue agent
"""
# Get information about the current state of the game
network_interface: NetworkInterface = args["network_interface"]
blue_action = args["blue_action"]
start_state = args["start_state"]
end_state = args["end_state"]
start_vulnerabilities = args["start_vulnerabilities"]
end_vulnerabilities = args["end_vulnerabilities"]
# Get number of safe states before and after the blue agents turn
initial_cumulative_states = sum(start_state.values())
final_cumulative_states = sum(end_state.values())
reward = 0
# punish agent for doing nothing if there are large numbers or red controlled nodes in the environment
if blue_action == "do_nothing":
reward = reward - (0.5 * final_cumulative_states)
# punish the blue agent for patching nodes that are already safe
if blue_action == "make_node_safe" or blue_action == "restore_node":
if initial_cumulative_states == final_cumulative_states:
reward = reward - (0.2 * initial_cumulative_states)
# punish the blue agent for reducing the vulnerability of a node that is already at minimum vulnerability
if blue_action == "reduce_vulnerability" and (
sum(start_vulnerabilities.values()) == sum(end_vulnerabilities.values())
):
reward = reward - 1
# punish for relocating deceptive nodes (after it has already been placed)
if blue_action == "add_deceptive_node":
if network_interface.reached_max_deceptive_nodes:
reward = reward - 5
return reward
[docs]def num_nodes_safe(args: dict) -> float:
"""
Provide reward based on the proportion of nodes safe within the environment.
Args:
args: A dictionary containing information from the
environment for the given timestep
Returns:
The calculated reward
"""
total_n_nodes = len(args["end_state"].values())
n_compromised = sum(args["end_state"].values())
n_safe = total_n_nodes - n_compromised
return n_safe / total_n_nodes
[docs]def dcbo_cost_func(args: dict) -> float:
"""
Calculate the cost function for DCBO using a set of fixed action cost values.
Args:
args: A dictionary containing the following items:
network_interface: Interface with the network
blue_action: The action that the blue agent has taken this turn
blue_node: The node that the blue agent has targeted for their action
start_state: The state of the nodes before the blue agent has taken their action
end_state: The state of the nodes after the blue agent has taken their action
start_vulnerabilities: The vulnerabilities before blue agents turn
end_vulnerabilities: The vulnerabilities after the blue agents turn
start_isolation: The isolation status of all the nodes at the start of a turn
end_isolation: The isolation status of all the nodes at the end of a turn
start_blue: The env as the blue agent can see it before the blue agents turn
end_blue: The env as the blue agent can see it after the blue agents turn
Returns:
The cost for DCBO
"""
# Get information about the current state of the environment
blue_action = args["blue_action"]
end_state = args["end_state"]
# cost for actions
action_cost = {
"reduce_vulnerability": 0,
"restore_node": 1,
"make_node_safe": 1,
"scan": 0,
"isolate": 1,
"connect": 0,
"do_nothing": 0,
"add_deceptive_node": 0,
}
reward = action_cost[blue_action]
# calculating number of red nodes before and after the blue agents turn
final_cumulative_states = sum(end_state.values())
cost = final_cumulative_states * 10 + reward
return 0 - cost