Merge pull request #1215 from Axelrod-Python/adaptor

meatballs · web-flow · commit 1a7688b2ceab · 2018-10-31T10:44:27.000Z
New Strategy: Implementation of Adaptor
diff --git a/axelrod/game.py b/axelrod/game.py
@@ -38,7 +38,7 @@ def RPST(self) -> Tuple[Score, Score, Score, Score]:
         P = self.scores[(D, D)][0]
         S = self.scores[(C, D)][0]
         T = self.scores[(D, C)][0]
-        return (R, P, S, T)
+        return R, P, S, T
 
     def score(self, pair: Tuple[Action, Action]) -> Tuple[Score, Score]:
         """Returns the appropriate score for a decision pair.
diff --git a/axelrod/strategies/_strategies.py b/axelrod/strategies/_strategies.py
@@ -1,4 +1,5 @@
 from .adaptive import Adaptive
+from .adaptor import AdaptorBrief, AdaptorLong
 from .alternator import Alternator
 from .ann import EvolvedANN, EvolvedANN5, EvolvedANNNoise05
 from .apavlov import APavlov2006, APavlov2011
@@ -230,6 +231,8 @@
 all_strategies = [
     Adaptive,
     AdaptiveTitForTat,
+    AdaptorBrief,
+    AdaptorLong,
     Aggravater,
     Alexei,
     ALLCorALLD,
diff --git a/axelrod/strategies/adaptor.py b/axelrod/strategies/adaptor.py
@@ -0,0 +1,104 @@
+from typing import Dict, Tuple
+
+from axelrod.action import Action
+from axelrod.player import Player
+from axelrod.random_ import random_choice
+
+from numpy import heaviside
+
+C, D = Action.C, Action.D
+
+
+class AbstractAdaptor(Player):
+    """
+    An adaptive strategy that updates an internal state based on the last
+    round of play. Using this state the player Cooperates with a probability
+    derived from the state.
+
+    s, float:
+        the internal state, initially 0
+    perr, float:
+        an error threshold for misinterpreted moves
+    delta, a dictionary of floats:
+        additive update values for s depending on the last round's outcome
+
+    Names:
+
+    - Adaptor: [Hauert2002]_
+
+    """
+
+    name = "AbstractAdaptor"
+    classifier = {
+        "memory_depth": float("inf"),  # Long memory
+        "stochastic": True,
+        "makes_use_of": set(),
+        "long_run_time": False,
+        "inspects_source": False,
+        "manipulates_source": False,
+        "manipulates_state": False,
+    }
+
+    def __init__(self, delta: Dict[Tuple[Action, Action], float],
+                 perr: float = 0.01) -> None:
+        super().__init__()
+        self.perr = perr
+        self.delta = delta
+        self.s = 0.
+
+    def strategy(self, opponent: Player) -> Action:
+        if self.history:
+            # Update internal state from the last play
+            last_round = (self.history[-1], opponent.history[-1])
+            self.s += self.delta[last_round]
+
+        # Compute probability of Cooperation
+        p = self.perr + (1.0 - 2 * self.perr) * (
+            heaviside(self.s + 1, 1) - heaviside(self.s - 1, 1))
+        # Draw action
+        action = random_choice(p)
+        return action
+
+
+class AdaptorBrief(AbstractAdaptor):
+    """
+    An Adaptor trained on short interactions.
+
+    Names:
+
+    - AdaptorBrief: [Hauert2002]_
+
+    """
+
+    name = "AdaptorBrief"
+
+    def __init__(self) -> None:
+        delta = {
+            (C, C): 0.,         # R
+            (C, D): -1.001505,  # S
+            (D, C): 0.992107,   # T
+            (D, D): -0.638734   # P
+        }
+        super().__init__(delta=delta)
+
+
+class AdaptorLong(AbstractAdaptor):
+    """
+    An Adaptor trained on long interactions.
+
+    Names:
+
+    - AdaptorLong: [Hauert2002]_
+
+    """
+
+    name = "AdaptorLong"
+
+    def __init__(self) -> None:
+        delta = {
+            (C, C): 0.,        # R
+            (C, D): 1.888159,  # S
+            (D, C): 1.858883,  # T
+            (D, D): -0.995703  # P
+        }
+        super().__init__(delta=delta)
diff --git a/axelrod/strategies/bush_mosteller.py b/axelrod/strategies/bush_mosteller.py
@@ -51,15 +51,15 @@ def __init__(
         aspiration_level_divider: float, 3.0
             Value that regulates the aspiration level,
             isn't modified during match
-		learning rate [0 , 1]
-			Percentage of learning speed
+        learning rate [0 , 1]
+            Percentage of learning speed
         Variables / Constants
-		_stimulus (Var: [-1 , 1]): float
+        stimulus (Var: [-1 , 1]): float
             Value that impacts the changes of action probability
         _aspiration_level: float
             Value that impacts the stimulus changes, isn't modified during match
         _init_c_prob , _init_d_prob : float
-        	Values used to properly set up reset(),
+            Values used to properly set up reset(),
             set to original probabilities
         """
         super().__init__()
diff --git a/axelrod/tests/strategies/test_adaptor.py b/axelrod/tests/strategies/test_adaptor.py
@@ -0,0 +1,94 @@
+"""Tests for the adaptor"""
+
+import unittest
+
+import axelrod
+from axelrod import Game
+
+from .test_player import TestPlayer, test_four_vector
+
+C, D = axelrod.Action.C, axelrod.Action.D
+
+
+class TestAdaptorBrief(TestPlayer):
+
+    name = "AdaptorBrief"
+    player = axelrod.AdaptorBrief
+    expected_classifier = {
+        "memory_depth": float("inf"),
+        "stochastic": True,
+        "makes_use_of": set(),
+        "inspects_source": False,
+        "manipulates_source": False,
+        "manipulates_state": False,
+    }
+
+    def test_strategy(self):
+        # No error.
+        actions = [(C, C), (C, C), (C, C), (C, C)]
+        self.versus_test(
+            opponent=axelrod.AdaptorBrief(), expected_actions=actions, seed=0
+        )
+
+        # Error corrected.
+        actions = [(C, C), (C, D), (D, C), (C, C)]
+        self.versus_test(
+            opponent=axelrod.AdaptorBrief(), expected_actions=actions, seed=22
+        )
+
+        # Error corrected, example 2
+        actions = [(D, C), (C, D), (D, C), (C, D), (C, C)]
+        self.versus_test(
+            opponent=axelrod.AdaptorBrief(), expected_actions=actions, seed=925
+        )
+
+        # Versus Cooperator
+        actions = [(C, C)] * 8
+        self.versus_test(
+            opponent=axelrod.Cooperator(), expected_actions=actions, seed=0
+        )
+
+        # Versus Defector
+        actions = [(C, D), (D, D), (D, D), (D, D), (D, D), (D, D), (D, D)]
+        self.versus_test(
+            opponent=axelrod.Defector(), expected_actions=actions, seed=0
+        )
+
+
+class TestAdaptorLong(TestPlayer):
+
+    name = "AdaptorLong"
+    player = axelrod.AdaptorLong
+    expected_classifier = {
+        "memory_depth": float("inf"),
+        "stochastic": True,
+        "makes_use_of": set(),
+        "inspects_source": False,
+        "manipulates_source": False,
+        "manipulates_state": False,
+    }
+
+    def test_strategy(self):
+        # No error.
+        actions = [(C, C), (C, C), (C, C), (C, C)]
+        self.versus_test(
+            opponent=axelrod.AdaptorLong(), expected_actions=actions, seed=0
+        )
+
+        # Error corrected.
+        actions = [(C, C), (C, D), (D, D), (C, C), (C, C)]
+        self.versus_test(
+            opponent=axelrod.AdaptorLong(), expected_actions=actions, seed=22
+        )
+
+        # Versus Cooperator
+        actions = [(C, C)] * 8
+        self.versus_test(
+            opponent=axelrod.Cooperator(), expected_actions=actions, seed=0
+        )
+
+        # Versus Defector
+        actions = [(C, D), (D, D), (C, D), (D, D), (D, D), (C, D), (D, D)]
+        self.versus_test(
+            opponent=axelrod.Defector(), expected_actions=actions, seed=0
+        )
diff --git a/axelrod/tests/strategies/test_memoryone.py b/axelrod/tests/strategies/test_memoryone.py
@@ -71,7 +71,8 @@ class TestWinShiftLoseStayTestPlayer(TestPlayer):
     def test_strategy(self):
         # Check that switches if does not get best payoff.
         actions = [(D, C), (C, D), (C, C), (D, D), (D, C)]
-        self.versus_test(opponent=axelrod.Alternator(), expected_actions=actions)
+        self.versus_test(opponent=axelrod.Alternator(),
+                         expected_actions=actions)
 
 
 class TestGTFT(TestPlayer):
diff --git a/axelrod/tests/strategies/test_meta.py b/axelrod/tests/strategies/test_meta.py
@@ -548,7 +548,7 @@ class TestNMWEStochastic(TestMetaPlayer):
     }
 
     def test_strategy(self):
-        actions = [(C, C), (C, D), (D, C), (D, D), (D, C)]
+        actions = [(C, C), (C, D), (C, C), (D, D), (D, C)]
         self.versus_test(opponent=axelrod.Alternator(), expected_actions=actions)
 
 
diff --git a/docs/reference/all_strategies.rst b/docs/reference/all_strategies.rst
@@ -8,6 +8,8 @@ Here are the docstrings of all the strategies in the library.
 
 .. automodule:: axelrod.strategies.adaptive
    :members:
+.. automodule:: axelrod.strategies.adaptor
+   :members:
 .. automodule:: axelrod.strategies.alternator
    :members:
 .. automodule:: axelrod.strategies.ann
diff --git a/docs/reference/bibliography.rst b/docs/reference/bibliography.rst
@@ -27,6 +27,7 @@ documentation.
 .. [Berg2015] Berg, P. Van Den, & Weissing, F. J. (2015). The importance of mechanisms for the evolution of cooperation. Proceedings of the Royal Society B-Biological Sciences, 282.
 .. [Eckhart2015] Eckhart Arnold (2016) CoopSim v0.9.9 beta 6.  https://github.com/jecki/CoopSim/
 .. [Frean1994] Frean, Marcus R. "The Prisoner's Dilemma without Synchrony." Proceedings: Biological Sciences, vol. 257, no. 1348, 1994, pp. 75–79. www.jstor.org/stable/50253.
+.. [Hauert2002] Hauert, Christoph, and Olaf Stenull. "Simple adaptive strategy wins the prisoner's dilemma." Journal of Theoretical Biology 218.3 (2002): 261-272.
 .. [Hilbe2013] Hilbe, C., Nowak, M.A. and Traulsen, A. (2013). Adaptive dynamics of extortion and compliance, PLoS ONE, 8(11), p. e77886. doi: 10.1371/journal.pone.0077886.
 .. [Hilbe2017] Hilbe, C., Martinez-Vaquero, L. A., Chatterjee K., Nowak M. A. (2017). Memory-n strategies of direct reciprocity, Proceedings of the National Academy of Sciences May 2017, 114 (18) 4715-4720; doi: 10.1073/pnas.1621239114.
 .. [Kuhn2017] Kuhn, Steven, "Prisoner's Dilemma", The Stanford Encyclopedia of Philosophy (Spring 2017 Edition), Edward N. Zalta (ed.), https://plato.stanford.edu/archives/spr2017/entries/prisoner-dilemma/
diff --git a/docs/tutorials/advanced/classification_of_strategies.rst b/docs/tutorials/advanced/classification_of_strategies.rst
@@ -47,7 +47,7 @@ strategies::
     ... }
     >>> strategies = axl.filtered_strategies(filterset)
     >>> len(strategies)
-    82
+    84
 
 Or, to find out how many strategies only use 1 turn worth of memory to
 make a decision::

Original file line number	Diff line number	Diff line change
`@@ -548,7 +548,7 @@ class TestNMWEStochastic(TestMetaPlayer):`
`548`	`548`	`}`
`549`	`549`
`550`	`550`	`def test_strategy(self):`
`551`		`- actions = [(C, C), (C, D), (D, C), (D, D), (D, C)]`
	`551`	`+ actions = [(C, C), (C, D), (C, C), (D, D), (D, C)]`
`552`	`552`	`self.versus_test(opponent=axelrod.Alternator(), expected_actions=actions)`
`553`	`553`
`554`	`554`