File tree 4 files changed +62
-3
lines changed 4 files changed +62
-3
lines changed Original file line number Diff line number Diff line change @@ -56,12 +56,16 @@ New features
56
56
API Changes
57
57
~~~~~~~~~~~
58
58
59
+
59
60
Experimental Features
60
61
~~~~~~~~~~~~~~~~~~~~~
61
62
62
63
Improvements to existing features
63
64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
64
65
66
+ - pd.read_clipboard will, if 'sep' is unspecified, try to detect data copied from a spreadsheet
67
+ and parse accordingly. (:issue: `6223 `)
68
+
65
69
.. _release.bug_fixes-0.14.0 :
66
70
67
71
Bug Fixes
Original file line number Diff line number Diff line change @@ -28,10 +28,13 @@ There are no deprecations of prior behavior in 0.14.0
28
28
Enhancements
29
29
~~~~~~~~~~~~
30
30
31
+ - pd.read_clipboard will, if 'sep' is unspecified, try to detect data copied from a spreadsheet
32
+ and parse accordingly. (:issue:`6223`)
33
+
34
+
31
35
Performance
32
36
~~~~~~~~~~~
33
37
34
-
35
38
Experimental
36
39
~~~~~~~~~~~~
37
40
Original file line number Diff line number Diff line change @@ -14,12 +14,29 @@ def read_clipboard(**kwargs): # pragma: no cover
14
14
-------
15
15
parsed : DataFrame
16
16
"""
17
- if kwargs .get ('sep' ) is None and kwargs .get ('delim_whitespace' ) is None :
18
- kwargs ['sep' ] = '\s+'
19
17
from pandas .util .clipboard import clipboard_get
20
18
from pandas .io .parsers import read_table
21
19
text = clipboard_get ()
22
20
21
+ # Excel copies into clipboard with \t seperation
22
+ # inspect no more then the 10 first lines, if they
23
+ # all contain an equal number (>0) of tabs, infer
24
+ # that this came from excel and set 'sep' accordingly
25
+ lines = text [:10000 ].split ('\n ' )[:- 1 ][:10 ]
26
+
27
+ # Need to remove leading white space, since read_table
28
+ # accepts:
29
+ # a b
30
+ # 0 1 2
31
+ # 1 3 4
32
+
33
+ counts = set ([x .lstrip ().count ('\t ' ) for x in lines ])
34
+ if len (lines )> 1 and len (counts ) == 1 and counts .pop () != 0 :
35
+ kwargs ['sep' ] = '\t '
36
+
37
+ if kwargs .get ('sep' ) is None and kwargs .get ('delim_whitespace' ) is None :
38
+ kwargs ['sep' ] = '\s+'
39
+
23
40
# try to decode (if needed on PY3)
24
41
if compat .PY3 :
25
42
try :
Original file line number Diff line number Diff line change 2
2
from numpy .random import randint
3
3
4
4
import nose
5
+ import pandas as pd
5
6
6
7
from pandas import DataFrame
7
8
from pandas import read_clipboard
@@ -65,3 +66,37 @@ def test_round_trip_frame_string(self):
65
66
def test_round_trip_frame (self ):
66
67
for dt in self .data_types :
67
68
self .check_round_trip_frame (dt )
69
+
70
+ def test_read_clipboard_infer_excel (self ):
71
+ from textwrap import dedent
72
+ from pandas .util .clipboard import clipboard_set
73
+
74
+ text = dedent ("""
75
+ John James Charlie Mingus
76
+ 1 2
77
+ 4 Harry Carney
78
+ """ .strip ())
79
+ clipboard_set (text )
80
+ df = pd .read_clipboard ()
81
+
82
+ # excel data is parsed correctly
83
+ self .assertEqual (df .iloc [1 ][1 ], 'Harry Carney' )
84
+
85
+ # having diff tab counts doesn't trigger it
86
+ text = dedent ("""
87
+ a\t b
88
+ 1 2
89
+ 3 4
90
+ """ .strip ())
91
+ clipboard_set (text )
92
+ res = pd .read_clipboard ()
93
+
94
+ text = dedent ("""
95
+ a b
96
+ 1 2
97
+ 3 4
98
+ """ .strip ())
99
+ clipboard_set (text )
100
+ exp = pd .read_clipboard ()
101
+
102
+ tm .assert_frame_equal (res , exp )
You can’t perform that action at this time.
0 commit comments