Skip to content

Adding soft grudger and reverse pavlov (win shift lose stay) #628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 10, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions axelrod/strategies/_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
GoByMajority5,
HardGoByMajority, HardGoByMajority10, HardGoByMajority20, HardGoByMajority40,
HardGoByMajority5)
from .grudger import Grudger, ForgetfulGrudger, OppositeGrudger, Aggravater
from .grudger import (Grudger, ForgetfulGrudger, OppositeGrudger, Aggravater,
SoftGrudger)
from .grumpy import Grumpy
from .handshake import Handshake
from .hunter import (
Expand All @@ -39,7 +40,7 @@
from .memoryone import (
MemoryOnePlayer, ALLCorALLD, FirmButFair, GTFT, SoftJoss,
StochasticCooperator, StochasticWSLS, ZDExtort2, ZDExtort2v2, ZDExtort4,
ZDGen2, ZDGTFT2, ZDSet2, WinStayLoseShift)
ZDGen2, ZDGTFT2, ZDSet2, WinStayLoseShift, WinShiftLoseStay)
from .mindcontrol import MindController, MindWarper, MindBender
from .mindreader import MindReader, ProtectedMindReader, MirrorMindReader
from .oncebitten import OnceBitten, FoolMeOnce, ForgetfulFoolMeOnce, FoolMeForever
Expand Down Expand Up @@ -162,6 +163,7 @@
RiskyQLearner,
Shubik,
SneakyTitForTat,
SoftGrudger,
SoftJoss,
SolutionB1,
SolutionB5,
Expand All @@ -177,12 +179,13 @@
TrickyDefector,
Tullock,
TwoTitsForTat,
WinShiftLoseStay,
WinStayLoseShift,
ZDExtort2,
ZDExtort2v2,
ZDExtort4,
ZDGen2,
ZDGTFT2,
ZDGen2,
ZDSet2,
e,
]
50 changes: 50 additions & 0 deletions axelrod/strategies/grudger.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,53 @@ def strategy(opponent):
elif opponent.defections:
return D
return C


class SoftGrudger(Player):
"""
A modification of the Grudger strategy. Instead of punishing by always
defecting: punishes by playing: D, D, D, D, C, C. (Will continue to
cooperate afterwards).

For reference see: "Engineering Design of Strategies for Winning
Iterated Prisoner's Dilemma Competitions" by Jiawei Li, Philip Hingston,
and Graham Kendall. IEEE TRANSACTIONS ON COMPUTATIONAL INTELLIGENCE AND AI
IN GAMES, VOL. 3, NO. 4, DECEMBER 2011
"""

name = 'Soft Grudger'
classifier = {
'memory_depth': 6,
'stochastic': False,
'makes_use_of': set(),
'inspects_source': False,
'manipulates_source': False,
'manipulates_state': False
}

def __init__(self):
"""Initialised the player."""
super(SoftGrudger, self).__init__()
self.grudged = False
self.grudge_memory = 0

def strategy(self, opponent):
"""Begins by playing C, then plays D, D, D, D, C, C against a defection
"""
if self.grudged:
strategy = [D, D, D, C, C][self.grudge_memory]
self.grudge_memory += 1
if self.grudge_memory == 5:
self.grudge_memory = 0
self.grudged = False
return strategy
elif D in opponent.history[-1:]:
self.grudged = True
return D
return C

def reset(self):
"""Resets scores and history."""
Player.reset(self)
self.grudged = False
self.grudge_memory = 0
26 changes: 26 additions & 0 deletions axelrod/strategies/memoryone.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,32 @@ def __init__(self, initial=C):
self._initial = initial


class WinShiftLoseStay(MemoryOnePlayer):
"""Win-Shift Lose-Stay, also called Reverse Pavlov.

For reference see: "Engineering Design of Strategies for Winning
Iterated Prisoner's Dilemma Competitions" by Jiawei Li, Philip Hingston,
and Graham Kendall. IEEE TRANSACTIONS ON COMPUTATIONAL INTELLIGENCE AND AI
IN GAMES, VOL. 3, NO. 4, DECEMBER 2011
"""

name = 'Win-Shift Lose-Stay'
classifier = {
'memory_depth': 1, # Memory-one Four-Vector
'stochastic': False,
'makes_use_of': set(),
'inspects_source': False,
'manipulates_source': False,
'manipulates_state': False
}

@init_args
def __init__(self, initial=D):
Player.__init__(self)
self.set_four_vector([0, 1, 1, 0])
self._initial = initial


class GTFT(MemoryOnePlayer):
"""Generous Tit-For-Tat Strategy."""

Expand Down
45 changes: 45 additions & 0 deletions axelrod/tests/unit/test_grudger.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,48 @@ def test_strategy(self):
"""
self.responses_test([C, D, D, D], [C, C, C, C], [C])
self.responses_test([C, C, D, D, D], [C, D, C, C, C], [D])


class TestSoftGrudger(TestPlayer):

name = "Soft Grudger"
player = axelrod.SoftGrudger
expected_classifier = {
'memory_depth': 6,
'stochastic': False,
'makes_use_of': set(),
'inspects_source': False,
'manipulates_source': False,
'manipulates_state': False
}

def test_initial_strategy(self):
"""
Starts by cooperating
"""
self.first_play_test(C)

def test_strategy(self):
"""
If opponent defects at any point then the player will respond with D, D,
D, D, C, C
"""
self.responses_test([C], [C], [C])
self.responses_test([C, C], [C, D], [D])
self.responses_test([C, C, D], [C, D, C], [D])
self.responses_test([C, C, D, D], [C, D, C, C], [D])
self.responses_test([C, C, D, D, D], [C, D, C, C, C], [D])
self.responses_test([C, C, D, D, D, D], [C, D, C, C, C, C], [C])
self.responses_test([C, C, D, D, D, D, C], [C, D, C, C, C, C, C], [C])
self.responses_test([C, C, D, D, D, D, C, C],
[C, D, C, C, C, C, C, D], [D])
self.responses_test([C, C, D, D, D, D, C, C, D],
[C, D, C, C, C, C, C, D, C], [D])

def test_reset(self):
p = axelrod.SoftGrudger()
p.grudged = True
p.grudge_memory = 5
p.reset()
self.assertFalse(p.grudged)
self.assertEqual(p.grudge_memory, 0)
22 changes: 22 additions & 0 deletions axelrod/tests/unit/test_memoryone.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,28 @@ def test_effect_of_strategy(self):
self.markov_test([C, D, D, C])


class TestWinShiftLoseStayTestPlayer(TestPlayer):

name = "Win-Shift Lose-Stay"
player = axelrod.WinShiftLoseStay
expected_classifier = {
'memory_depth': 1,
'stochastic': False,
'makes_use_of': set(),
'inspects_source': False,
'manipulates_source': False,
'manipulates_state': False
}

def test_strategy(self):
"""Starts by cooperating"""
self.first_play_test(D)

def test_effect_of_strategy(self):
"""Check that switches if does not get best payoff."""
self.markov_test([D, C, C, D])


class TestGTFT(TestPlayer):

name = "GTFT: 0.33"
Expand Down
23 changes: 23 additions & 0 deletions axelrod/tests/unit/test_titfortat.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,26 @@ def test_reset_cleans_all(self):
self.assertFalse(p.punishing)
self.assertEqual(p.punishment_count, 0)
self.assertEqual(p.punishment_limit, 0)

def test_output_from_literature(self):
"""
This strategy is not fully described in the literature, however the
following two results are reported in:

Bruno Beaufils, Jean-Paul Delahaye, Philippe Mathie
"Our Meeting With Gradual: A Good Strategy For The Iterated Prisoner's
Dilemma" Proc. Artif. Life 1996

This test just ensures that the strategy is as was originally defined
"""
player = axelrod.Gradual()

opp1 = axelrod.Defector()
match = axelrod.Match((player, opp1), 1000)
match.play()
self.assertEqual(match.final_score(), (915, 1340))

opp2 = axelrod.CyclerCCD()
match = axelrod.Match((player, opp2), 1000)
match.play()
self.assertEqual(match.final_score(), (3472, 767))
2 changes: 1 addition & 1 deletion docs/tutorials/advanced/classification_of_strategies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Or indeed find out how many strategy only use 1 turn worth of memory to
make a decision::

>>> len([s for s in axl.strategies if s().classifier['memory_depth']==1])
19
20

We can also identify strategies that make use of particular properties of the
tournament. For example, here is the number of strategies that make use of the
Expand Down