Description
Code Sample, a copy-pastable example if possible
In [2]: store = pd.HDFStore('teststore.h5', 'w')
In [3]: chunk = pd.DataFrame({'V1':['a','b','c','d','e'], 'data': range(5)})
In [4]: store.append('df', chunk)
In [5]: chunk = pd.DataFrame({'V1':[None, None], 'data': [3, 5]})
In [6]: store.append('df', chunk)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-d2de62391194> in <module>()
----> 1 store.append('df', chunk)
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in append(self, key, value, format, append, columns, dropna, **kwargs)
966 kwargs = self._validate_format(format, kwargs)
967 self._write_to_group(key, value, append=append, dropna=dropna,
--> 968 **kwargs)
969
970 def append_to_multiple(self, d, value, selector, data_columns=None,
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1311
1312 # write the object
-> 1313 s.write(obj=value, append=append, complib=complib, **kwargs)
1314
1315 if s.is_table and index:
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
3867 self.create_axes(axes=axes, obj=obj, validate=append,
3868 min_itemsize=min_itemsize,
-> 3869 **kwargs)
3870
3871 for a in self.axes:
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3539 self.values_axes.append(col)
3540 except (NotImplementedError, ValueError, TypeError) as e:
-> 3541 raise e
3542 except Exception as detail:
3543 raise Exception(
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3534 encoding=self.encoding,
3535 info=self.info,
-> 3536 **kwargs)
3537 col.set_pos(j)
3538
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in set_atom(self, block, block_items, existing_col, min_itemsize, nan_rep, info, encoding, **kwargs)
1894 min_itemsize,
1895 nan_rep,
-> 1896 encoding)
1897
1898 # set as a data block
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in set_atom_string(self, block, block_items, existing_col, min_itemsize, nan_rep, encoding)
1941 # check for column in the values conflicts
1942 if existing_col is not None:
-> 1943 eci = existing_col.validate_col(itemsize)
1944 if eci > itemsize:
1945 itemsize = eci
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in validate_col(self, itemsize)
1633 "column but\nthis column has a limit of [%s]!\n"
1634 "Consider using min_itemsize to preset the sizes on "
-> 1635 "these columns" % (itemsize, self.cname, c.itemsize))
1636 return c.itemsize
1637
ValueError: Trying to store a string with len [3] in [values_block_1] column but
this column has a limit of [1]!
Consider using min_itemsize to preset the sizes on these columns
Problem description
The error message is confusing (in particular when you ignore the fact that your data contains None
) - it doesn't even provide the right column name.
Expected Output
Something analogous to
In [5]: chunk = pd.DataFrame({'V1':['f', 4.5], 'data': [3, 5]})
In [6]: store.append('df', chunk)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-d2de62391194> in <module>()
----> 1 store.append('df', chunk)
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in append(self, key, value, format, append, columns, dropna, **kwargs)
966 kwargs = self._validate_format(format, kwargs)
967 self._write_to_group(key, value, append=append, dropna=dropna,
--> 968 **kwargs)
969
970 def append_to_multiple(self, d, value, selector, data_columns=None,
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1311
1312 # write the object
-> 1313 s.write(obj=value, append=append, complib=complib, **kwargs)
1314
1315 if s.is_table and index:
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
3867 self.create_axes(axes=axes, obj=obj, validate=append,
3868 min_itemsize=min_itemsize,
-> 3869 **kwargs)
3870
3871 for a in self.axes:
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3539 self.values_axes.append(col)
3540 except (NotImplementedError, ValueError, TypeError) as e:
-> 3541 raise e
3542 except Exception as detail:
3543 raise Exception(
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3534 encoding=self.encoding,
3535 info=self.info,
-> 3536 **kwargs)
3537 col.set_pos(j)
3538
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in set_atom(self, block, block_items, existing_col, min_itemsize, nan_rep, info, encoding, **kwargs)
1894 min_itemsize,
1895 nan_rep,
-> 1896 encoding)
1897
1898 # set as a data block
/home/pietro/nobackup/repo/pandas/pandas/io/pytables.py in set_atom_string(self, block, block_items, existing_col, min_itemsize, nan_rep, encoding)
1926 "Cannot serialize the column [%s] because\n"
1927 "its data contents are [%s] object dtype"
-> 1928 % (item, inferred_type)
1929 )
1930
TypeError: Cannot serialize the column [V1] because
its data contents are [mixed] object dtype
Notice that the error is even different if you try with chunk = pd.DataFrame({'V1':[7, 4.5], 'data': [3, 5]})
, which results in ValueError: invalid combinate of [values_axes] on appending data [name->values_block_1,cname->values_block_1,dtype->float64,kind->float,shape->(1, 2)] vs current table [name->values_block_1,cname->values_block_1,dtype->bytes8,kind->string,shape->None]
.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.7.0-1-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.utf8
LOCALE: it_IT.UTF-8
pandas: 0.21.0.dev+5.g82b9a172e
pytest: 3.0.6
pip: 9.0.1
setuptools: 33.1.1
Cython: 0.25.2
numpy: 1.12.1
scipy: 0.18.1
xarray: 0.9.2
IPython: 5.1.0.dev
sphinx: 1.4.9
patsy: 0.3.0-dev
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: 1.3.0.dev0
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.0
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: 3.7.1
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.8
s3fs: None
pandas_gbq: None
pandas_datareader: 0.2.1