game: sheriff

GameType.chance_mode = ChanceMode.DETERMINISTIC
GameType.dynamics = Dynamics.SEQUENTIAL
GameType.information = Information.IMPERFECT_INFORMATION
GameType.long_name = "Sheriff"
GameType.max_num_players = 2
GameType.min_num_players = 2
GameType.parameter_specification = ["item_penalty", "item_value", "max_bribe", "max_items", "num_rounds", "sheriff_penalty"]
GameType.provides_information_state_string = True
GameType.provides_information_state_tensor = False
GameType.provides_observation_string = False
GameType.provides_observation_tensor = False
GameType.provides_factored_observation_string = False
GameType.reward_model = RewardModel.TERMINAL
GameType.short_name = "sheriff"
GameType.utility = Utility.GENERAL_SUM

NumDistinctActions() = 10
PolicyTensorShape() = [10]
MaxChanceOutcomes() = 0
GetParameters() = {item_penalty=2.0,item_value=1.0,max_bribe=3,max_items=3,num_rounds=4,sheriff_penalty=3.0}
NumPlayers() = 2
MinUtility() = -6.0
MaxUtility() = 6.0
UtilitySum() = None
MaxGameLength() = 9
ToString() = "sheriff()"

# State 0
# Initial game state (smuggler hasn't decided the number of illegal cargo items yet)
IsTerminal() = False
History() = []
HistoryString() = ""
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "T=0 num_illegal_items:none"
InformationStateString(1) = "T=0 "
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [2, 3, 4, 5]
StringLegalActions() = ["PlaceIllegalItems(num=0)", "PlaceIllegalItems(num=1)", "PlaceIllegalItems(num=2)", "PlaceIllegalItems(num=3)"]

# Apply action "PlaceIllegalItems(num=1)"
action: 3

# State 1
# Num illegal items in cargo: 1
# Bribes  : []
# Feedback: []
IsTerminal() = False
History() = [3]
HistoryString() = "3"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "T=1 num_illegal_items:1"
InformationStateString(1) = "T=1 "
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [6, 7, 8, 9]
StringLegalActions() = ["Bribe(amount=0)", "Bribe(amount=1)", "Bribe(amount=2)", "Bribe(amount=3)"]

# Apply action "Bribe(amount=3)"
action: 9

# State 2
# Num illegal items in cargo: 1
# Bribes  : [3]
# Feedback: []
IsTerminal() = False
History() = [3, 9]
HistoryString() = "3, 9"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "T=2 num_illegal_items:1/bribe:3"
InformationStateString(1) = "T=2 /bribe:3"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [0, 1]
StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"]

# Apply action "InspectionFeedback(will_inspect=True)"
action: 1

# State 3
# Num illegal items in cargo: 1
# Bribes  : [3]
# Feedback: [1]
IsTerminal() = False
History() = [3, 9, 1]
HistoryString() = "3, 9, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "T=3 num_illegal_items:1/bribe:3/feedback:1"
InformationStateString(1) = "T=3 /bribe:3/feedback:1"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [6, 7, 8, 9]
StringLegalActions() = ["Bribe(amount=0)", "Bribe(amount=1)", "Bribe(amount=2)", "Bribe(amount=3)"]

# Apply action "Bribe(amount=3)"
action: 9

# State 4
# Num illegal items in cargo: 1
# Bribes  : [3,3]
# Feedback: [1]
IsTerminal() = False
History() = [3, 9, 1, 9]
HistoryString() = "3, 9, 1, 9"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "T=4 num_illegal_items:1/bribe:3/feedback:1/bribe:3"
InformationStateString(1) = "T=4 /bribe:3/feedback:1/bribe:3"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [0, 1]
StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"]

# Apply action "InspectionFeedback(will_inspect=False)"
action: 0

# State 5
# Num illegal items in cargo: 1
# Bribes  : [3,3]
# Feedback: [1,0]
IsTerminal() = False
History() = [3, 9, 1, 9, 0]
HistoryString() = "3, 9, 1, 9, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "T=5 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0"
InformationStateString(1) = "T=5 /bribe:3/feedback:1/bribe:3/feedback:0"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [6, 7, 8, 9]
StringLegalActions() = ["Bribe(amount=0)", "Bribe(amount=1)", "Bribe(amount=2)", "Bribe(amount=3)"]

# Apply action "Bribe(amount=2)"
action: 8

# State 6
# Num illegal items in cargo: 1
# Bribes  : [3,3,2]
# Feedback: [1,0]
IsTerminal() = False
History() = [3, 9, 1, 9, 0, 8]
HistoryString() = "3, 9, 1, 9, 0, 8"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "T=6 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0/bribe:2"
InformationStateString(1) = "T=6 /bribe:3/feedback:1/bribe:3/feedback:0/bribe:2"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [0, 1]
StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"]

# Apply action "InspectionFeedback(will_inspect=False)"
action: 0

# State 7
# Num illegal items in cargo: 1
# Bribes  : [3,3,2]
# Feedback: [1,0,0]
IsTerminal() = False
History() = [3, 9, 1, 9, 0, 8, 0]
HistoryString() = "3, 9, 1, 9, 0, 8, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "T=7 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0"
InformationStateString(1) = "T=7 /bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [6, 7, 8, 9]
StringLegalActions() = ["Bribe(amount=0)", "Bribe(amount=1)", "Bribe(amount=2)", "Bribe(amount=3)"]

# Apply action "Bribe(amount=3)"
action: 9

# State 8
# Num illegal items in cargo: 1
# Bribes  : [3,3,2,3]
# Feedback: [1,0,0]
IsTerminal() = False
History() = [3, 9, 1, 9, 0, 8, 0, 9]
HistoryString() = "3, 9, 1, 9, 0, 8, 0, 9"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "T=8 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0/bribe:3"
InformationStateString(1) = "T=8 /bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0/bribe:3"
Rewards() = [0.0, 0.0]
Returns() = [0.0, 0.0]
LegalActions() = [0, 1]
StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"]

# Apply action "InspectionFeedback(will_inspect=True)"
action: 1

# State 9
# Num illegal items in cargo: 1
# Bribes  : [3,3,2,3]
# Feedback: [1,0,0,1]
IsTerminal() = True
History() = [3, 9, 1, 9, 0, 8, 0, 9, 1]
HistoryString() = "3, 9, 1, 9, 0, 8, 0, 9, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = -4
InformationStateString(0) = "T=9 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0/bribe:3/feedback:1"
InformationStateString(1) = "T=9 /bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0/bribe:3/feedback:1"
Rewards() = [-2.0, 2.0]
Returns() = [-2.0, 2.0]
