Skip to content

Commit 664bccf

Browse files
author
y-p
committed
Merge pull request #6226 from y-p/PR_clip_excel
ENH: pd.read_clipboard detects tab-separated data (excel) GH6223
2 parents 8569378 + 7e59d62 commit 664bccf

File tree

4 files changed

+62
-3
lines changed

4 files changed

+62
-3
lines changed

doc/source/release.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,16 @@ New features
5656
API Changes
5757
~~~~~~~~~~~
5858

59+
5960
Experimental Features
6061
~~~~~~~~~~~~~~~~~~~~~
6162

6263
Improvements to existing features
6364
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6465

66+
- pd.read_clipboard will, if 'sep' is unspecified, try to detect data copied from a spreadsheet
67+
and parse accordingly. (:issue:`6223`)
68+
6569
.. _release.bug_fixes-0.14.0:
6670

6771
Bug Fixes

doc/source/v0.14.0.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,13 @@ There are no deprecations of prior behavior in 0.14.0
2828
Enhancements
2929
~~~~~~~~~~~~
3030

31+
- pd.read_clipboard will, if 'sep' is unspecified, try to detect data copied from a spreadsheet
32+
and parse accordingly. (:issue:`6223`)
33+
34+
3135
Performance
3236
~~~~~~~~~~~
3337

34-
3538
Experimental
3639
~~~~~~~~~~~~
3740

pandas/io/clipboard.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,29 @@ def read_clipboard(**kwargs): # pragma: no cover
1414
-------
1515
parsed : DataFrame
1616
"""
17-
if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
18-
kwargs['sep'] = '\s+'
1917
from pandas.util.clipboard import clipboard_get
2018
from pandas.io.parsers import read_table
2119
text = clipboard_get()
2220

21+
# Excel copies into clipboard with \t seperation
22+
# inspect no more then the 10 first lines, if they
23+
# all contain an equal number (>0) of tabs, infer
24+
# that this came from excel and set 'sep' accordingly
25+
lines = text[:10000].split('\n')[:-1][:10]
26+
27+
# Need to remove leading white space, since read_table
28+
# accepts:
29+
# a b
30+
# 0 1 2
31+
# 1 3 4
32+
33+
counts = set([x.lstrip().count('\t') for x in lines])
34+
if len(lines)>1 and len(counts) == 1 and counts.pop() != 0:
35+
kwargs['sep'] = '\t'
36+
37+
if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
38+
kwargs['sep'] = '\s+'
39+
2340
# try to decode (if needed on PY3)
2441
if compat.PY3:
2542
try:

pandas/io/tests/test_clipboard.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from numpy.random import randint
33

44
import nose
5+
import pandas as pd
56

67
from pandas import DataFrame
78
from pandas import read_clipboard
@@ -65,3 +66,37 @@ def test_round_trip_frame_string(self):
6566
def test_round_trip_frame(self):
6667
for dt in self.data_types:
6768
self.check_round_trip_frame(dt)
69+
70+
def test_read_clipboard_infer_excel(self):
71+
from textwrap import dedent
72+
from pandas.util.clipboard import clipboard_set
73+
74+
text = dedent("""
75+
John James Charlie Mingus
76+
1 2
77+
4 Harry Carney
78+
""".strip())
79+
clipboard_set(text)
80+
df = pd.read_clipboard()
81+
82+
# excel data is parsed correctly
83+
self.assertEqual(df.iloc[1][1], 'Harry Carney')
84+
85+
# having diff tab counts doesn't trigger it
86+
text = dedent("""
87+
a\t b
88+
1 2
89+
3 4
90+
""".strip())
91+
clipboard_set(text)
92+
res = pd.read_clipboard()
93+
94+
text = dedent("""
95+
a b
96+
1 2
97+
3 4
98+
""".strip())
99+
clipboard_set(text)
100+
exp = pd.read_clipboard()
101+
102+
tm.assert_frame_equal(res, exp)

0 commit comments

Comments
 (0)