Skip to content

Commit ad983e7

Browse files
committed
Improve the implementation of itertools.tee().
Formerly, underlying queue was implemented in terms of two lists. The new queue is a series of singly-linked fixed length lists. The new implementation runs much faster, supports multi-way tees, and allows tees of tees without additional memory costs. The root ideas for this structure were contributed by Andrew Koenig and Guido van Rossum.
1 parent 767126d commit ad983e7

File tree

3 files changed

+242
-209
lines changed

3 files changed

+242
-209
lines changed

Doc/lib/libitertools.tex

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,9 @@ \subsection{Itertool functions \label{itertools-functions}}
281281
\end{verbatim}
282282
\end{funcdesc}
283283

284-
\begin{funcdesc}{tee}{iterable}
285-
Return two independent iterators from a single iterable.
286-
Equivalent to:
284+
\begin{funcdesc}{tee}{iterable\optional{, n=2}}
285+
Return \var{n} independent iterators from a single iterable.
286+
The case where \var{n} is two is equivalent to:
287287

288288
\begin{verbatim}
289289
def tee(iterable):
@@ -299,6 +299,10 @@ \subsection{Itertool functions \label{itertools-functions}}
299299
return (gen(it.next), gen(it.next))
300300
\end{verbatim}
301301

302+
Note, once \function{tee()} has made a split, the original \var{iterable}
303+
should not be used anywhere else; otherwise, the \var{iterable} could get
304+
advanced without the tee objects being informed.
305+
302306
Note, this member of the toolkit may require significant auxiliary
303307
storage (depending on how much temporary data needs to be stored).
304308
In general, if one iterator is going use most or all of the data before
@@ -408,6 +412,10 @@ \subsection{Examples \label{itertools-example}}
408412
def pairwise(iterable):
409413
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
410414
a, b = tee(iterable)
411-
return izip(a, islice(b, 1, None))
415+
try:
416+
b.next()
417+
except StopIteration:
418+
pass
419+
return izip(a, b)
412420
413421
\end{verbatim}

Lib/test/test_itertools.py

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def test_dropwhile(self):
200200
self.assertRaises(ValueError, dropwhile(errfunc, [(4,5)]).next)
201201

202202
def test_tee(self):
203-
n = 100
203+
n = 200
204204
def irange(n):
205205
for i in xrange(n):
206206
yield i
@@ -217,16 +217,16 @@ def irange(n):
217217
self.assertEqual(list(b), range(n))
218218

219219
a, b = tee(irange(n)) # test dealloc of leading iterator
220-
self.assertEqual(a.next(), 0)
221-
self.assertEqual(a.next(), 1)
220+
for i in xrange(100):
221+
self.assertEqual(a.next(), i)
222222
del a
223223
self.assertEqual(list(b), range(n))
224224

225225
a, b = tee(irange(n)) # test dealloc of trailing iterator
226-
self.assertEqual(a.next(), 0)
227-
self.assertEqual(a.next(), 1)
226+
for i in xrange(100):
227+
self.assertEqual(a.next(), i)
228228
del b
229-
self.assertEqual(list(a), range(2, n))
229+
self.assertEqual(list(a), range(100, n))
230230

231231
for j in xrange(5): # test randomly interleaved
232232
order = [0]*n + [1]*n
@@ -239,21 +239,31 @@ def irange(n):
239239
self.assertEqual(lists[0], range(n))
240240
self.assertEqual(lists[1], range(n))
241241

242+
# test argument format checking
242243
self.assertRaises(TypeError, tee)
243244
self.assertRaises(TypeError, tee, 3)
244245
self.assertRaises(TypeError, tee, [1,2], 'x')
246+
self.assertRaises(TypeError, tee, [1,2], 3, 'x')
245247

246-
try:
247-
class A(tee): pass
248-
except TypeError:
249-
pass
250-
else:
251-
self.fail("tee constructor should not be subclassable")
248+
# tee object should be instantiable
249+
a, b = tee('abc')
250+
c = type(a)('def')
251+
self.assertEqual(list(c), list('def'))
252+
253+
# test long-lagged and multi-way split
254+
a, b, c = tee(xrange(2000), 3)
255+
for i in xrange(100):
256+
self.assertEqual(a.next(), i)
257+
self.assertEqual(list(b), range(2000))
258+
self.assertEqual([c.next(), c.next()], range(2))
259+
self.assertEqual(list(a), range(100,2000))
260+
self.assertEqual(list(c), range(2,2000))
261+
262+
# tee pass-through to copyable iterator
263+
a, b = tee('abc')
264+
c, d = tee(a)
265+
self.assert_(a is c)
252266

253-
# tee_iterator should not be instantiable
254-
a, b = tee(xrange(10))
255-
self.assertRaises(TypeError, type(a))
256-
self.assert_(a is iter(a)) # tee_iterator should support __iter__
257267

258268
def test_StopIteration(self):
259269
self.assertRaises(StopIteration, izip().next)
@@ -317,13 +327,6 @@ def test_starmap(self):
317327
a = []
318328
self.makecycle(starmap(lambda *t: t, [(a,a)]*2), a)
319329

320-
def test_tee(self):
321-
a = []
322-
p, q = t = tee([a]*2)
323-
a += [a, p, q, t]
324-
p.next()
325-
del a, p, q, t
326-
327330
def R(seqn):
328331
'Regular generator'
329332
for i in seqn:
@@ -626,7 +629,11 @@ def f(t):
626629
>>> def pairwise(iterable):
627630
... "s -> (s0,s1), (s1,s2), (s2, s3), ..."
628631
... a, b = tee(iterable)
629-
... return izip(a, islice(b, 1, None))
632+
... try:
633+
... b.next()
634+
... except StopIteration:
635+
... pass
636+
... return izip(a, b)
630637
631638
This is not part of the examples but it tests to make sure the definitions
632639
perform as purported.

0 commit comments

Comments
 (0)