diff --git a/RELEASE.rst b/RELEASE.rst index 009bcb8c5d5d1..a9c1378ff5eb1 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -167,9 +167,11 @@ pandas 0.11.0 (so not using numexpr filtering, but isin filtering) - Internally, change all variables to be private-like (now have leading underscore) - - fixes for query parsing to correctly interpret boolean and != (GH2849_, GH2973_) - - fixes for pathological case on SparseSeries with 0-len array and compression (GH2931_) - - fixes bug with writing rows if part of a block was all-nan (GH3012_) + - Fixes for query parsing to correctly interpret boolean and != (GH2849_, GH2973_) + - Fixes for pathological case on SparseSeries with 0-len array and compression (GH2931_) + - Fixes bug with writing rows if part of a block was all-nan (GH3012_) + - Exceptions are now ValueError or TypeError as needed + - A table will now raise if min_itemsize contains fields which are not queryables - Bug showing up in applymap where some object type columns are converted (GH2909_) had an incorrect default in convert_objects diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fd9127efa72df..84a4121387964 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -458,9 +458,9 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star nrows = tbls[0].nrows for t in tbls: if t.nrows != nrows: - raise Exception("all tables must have exactly the same nrows!") + raise ValueError("all tables must have exactly the same nrows!") if not t.is_table: - raise Exception("object [%s] is not a table, and cannot be used in all select as multiple" % t.pathname) + raise TypeError("object [%s] is not a table, and cannot be used in all select as multiple" % t.pathname) # select coordinates from the selector table c = self.select_as_coordinates(selector, where, start=start, stop=stop) @@ -526,7 +526,7 @@ def remove(self, key, where=None, start=None, stop=None): except: if where is not None: - raise Exception("trying to remove a node with a non-None where clause!") + raise ValueError("trying to remove a node with a non-None where clause!") # we are actually trying to remove a node (with children) s = self.get_node(key) @@ -544,7 +544,7 @@ def remove(self, key, where=None, start=None, stop=None): # delete from the table else: if not s.is_table: - raise Exception('can only remove with where on objects written as tables') + raise ValueError('can only remove with where on objects written as tables') return s.delete(where = where, start=start, stop=stop) def append(self, key, value, columns=None, **kwargs): @@ -597,10 +597,10 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, * raise Exception("axes is currently not accepted as a paremter to append_to_multiple; you can create the tables indepdently instead") if not isinstance(d, dict): - raise Exception("append_to_multiple must have a dictionary specified as the way to split the value") + raise ValueError("append_to_multiple must have a dictionary specified as the way to split the value") if selector not in d: - raise Exception("append_to_multiple requires a selector that is in passed dict") + raise ValueError("append_to_multiple requires a selector that is in passed dict") # figure out the splitting axis (the non_index_axis) axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0] @@ -611,7 +611,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, * for k, v in d.items(): if v is None: if remain_key is not None: - raise Exception("append_to_multiple can only have one value in d that is None") + raise ValueError("append_to_multiple can only have one value in d that is None") remain_key = k else: remain_values.extend(v) @@ -655,7 +655,7 @@ def create_table_index(self, key, **kwargs): if s is None: return if not s.is_table: - raise Exception("cannot create table index on a non-table") + raise TypeError("cannot create table index on a non-table") s.create_index(**kwargs) def groups(self): @@ -727,8 +727,8 @@ def _create_storer(self, group, value = None, table = False, append = False, **k """ return a suitable Storer class to operate """ def error(t): - raise NotImplementedError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % - (t,group,type(value),table,append,kwargs)) + raise TypeError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % + (t,group,type(value),table,append,kwargs)) pt = getattr(group._v_attrs,'pandas_type',None) tt = getattr(group._v_attrs,'table_type',None) @@ -742,7 +742,7 @@ def error(t): pt = 'frame_table' tt = 'generic_table' else: - raise Exception("cannot create a storer if the object is not existing nor a value are passed") + raise TypeError("cannot create a storer if the object is not existing nor a value are passed") else: try: @@ -1044,8 +1044,10 @@ def validate_col(self, itemsize=None): if itemsize is None: itemsize = self.itemsize if c.itemsize < itemsize: - raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" - % (self.cname, itemsize, c.itemsize)) + raise ValueError("Trying to store a string with len [%s] in [%s] column but\n" + "this column has a limit of [%s]!\n" + "Consider using min_itemsize to preset the sizes on these columns" + % (itemsize,self.cname, c.itemsize)) return c.itemsize return None @@ -1176,11 +1178,11 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs): if inferred_type == 'datetime64': self.set_atom_datetime64(block) elif inferred_type == 'date': - raise NotImplementedError( - "date is not implemented as a table column") + raise TypeError( + "[date] is not implemented as a table column") elif inferred_type == 'unicode': - raise NotImplementedError( - "unicode is not implemented as a table column") + raise TypeError( + "[unicode] is not implemented as a table column") # this is basically a catchall; if say a datetime64 has nans then will # end up here ### @@ -1209,9 +1211,9 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep): col = block.get(item) inferred_type = lib.infer_dtype(col.ravel()) if inferred_type != 'string': - raise NotImplementedError("cannot serialize the column [%s] because " - "its data contents are [%s] object dtype" % - (item,inferred_type)) + raise TypeError("Cannot serialize the column [%s] because\n" + "its data contents are [%s] object dtype" % + (item,inferred_type)) # itemsize is the maximum length of a string (along any dimension) @@ -1268,13 +1270,13 @@ def validate_attr(self, append): existing_fields = getattr(self.attrs, self.kind_attr, None) if (existing_fields is not None and existing_fields != list(self.values)): - raise Exception("appended items do not match existing items" + raise ValueError("appended items do not match existing items" " in table!") existing_dtype = getattr(self.attrs, self.dtype_attr, None) if (existing_dtype is not None and existing_dtype != self.dtype): - raise Exception("appended items dtype do not match existing items dtype" + raise ValueError("appended items dtype do not match existing items dtype" " in table!") def convert(self, values, nan_rep): @@ -1497,7 +1499,7 @@ def delete(self, where = None, **kwargs): self._handle.removeNode(self.group, recursive=True) return None - raise NotImplementedError("cannot delete on an abstract storer") + raise TypeError("cannot delete on an abstract storer") class GenericStorer(Storer): """ a generified storer version """ @@ -2045,7 +2047,7 @@ def validate(self, other): for c in ['index_axes','non_index_axes','values_axes']: if getattr(self,c,None) != getattr(other,c,None): - raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,getattr(self,c,None),getattr(other,c,None))) + raise ValueError("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,getattr(self,c,None),getattr(other,c,None))) @property def nrows_expected(self): @@ -2132,6 +2134,21 @@ def validate_version(self, where = None): ws = incompatibility_doc % '.'.join([ str(x) for x in self.version ]) warnings.warn(ws, IncompatibilityWarning) + def validate_min_itemsize(self, min_itemsize): + """ validate the min_itemisze doesn't contain items that are not in the axes + this needs data_columns to be defined """ + if min_itemsize is None: return + if not isinstance(min_itemsize, dict): return + + q = self.queryables() + for k, v in min_itemsize.items(): + + # ok, apply generally + if k == 'values': + continue + if k not in q: + raise ValueError("min_itemsize has [%s] which is not an axis or data_column" % k) + @property def indexables(self): """ create/cache the indexables if they don't exist """ @@ -2262,8 +2279,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, try: axes = _AXES_MAP[type(obj)] except: - raise NotImplementedError("cannot properly create the storer for: [group->%s,value->%s]" % - (self.group._v_name,type(obj))) + raise TypeError("cannot properly create the storer for: [group->%s,value->%s]" % + (self.group._v_name,type(obj))) # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] @@ -2280,7 +2297,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, # currently support on ndim-1 axes if len(axes) != self.ndim - 1: - raise Exception("currently only support ndim-1 indexers in an AppendableTable") + raise ValueError("currently only support ndim-1 indexers in an AppendableTable") # create according to the new data self.non_index_axes = [] @@ -2370,7 +2387,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, try: existing_col = existing_table.values_axes[i] except: - raise Exception("Incompatible appended table [%s] with existing table [%s]" % + raise ValueError("Incompatible appended table [%s] with existing table [%s]" % (blocks,existing_table.values_axes)) else: existing_col = None @@ -2386,12 +2403,15 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, col.set_pos(j) self.values_axes.append(col) - except (NotImplementedError): - raise + except (NotImplementedError, ValueError, TypeError), e: + raise e except (Exception), detail: raise Exception("cannot find the correct atom type -> [dtype->%s,items->%s] %s" % (b.dtype.name, b.items, str(detail))) j += 1 + # validate our min_itemsize + self.validate_min_itemsize(min_itemsize) + # validate the axes if we have an existing table if validate: self.validate(existing_table) @@ -2433,7 +2453,7 @@ def process_filter(field, filt): takers = op(values,filt) return obj.ix._getitem_axis(takers,axis=axis_number) - raise Exception("cannot find the field [%s] for filtering!" % field) + raise ValueError("cannot find the field [%s] for filtering!" % field) obj = process_filter(field, filt) @@ -3111,12 +3131,12 @@ def __init__(self, field, op=None, value=None, queryables=None): self.value = op else: - raise Exception( + raise ValueError( "Term does not understand the supplied field [%s]" % field) # we have valid fields if self.field is None or self.op is None or self.value is None: - raise Exception("Could not create this term [%s]" % str(self)) + raise ValueError("Could not create this term [%s]" % str(self)) # = vs == if self.op == '=': @@ -3125,7 +3145,7 @@ def __init__(self, field, op=None, value=None, queryables=None): # we have valid conditions if self.op in ['>', '>=', '<', '<=']: if hasattr(self.value, '__iter__') and len(self.value) > 1: - raise Exception("an inequality condition cannot have multiple values [%s]" % str(self)) + raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self)) if not hasattr(self.value, '__iter__'): self.value = [self.value] @@ -3157,7 +3177,7 @@ def eval(self): """ set the numexpr expression for this term """ if not self.is_valid: - raise Exception("query term is not valid [%s]" % str(self)) + raise ValueError("query term is not valid [%s]" % str(self)) # convert values if we are in the table if self.is_in_table: @@ -3199,7 +3219,7 @@ def eval(self): else: - raise Exception("passing a filterable condition to a non-table indexer [%s]" % str(self)) + raise TypeError("passing a filterable condition to a non-table indexer [%s]" % str(self)) def convert_value(self, v): """ convert the expression that is in the term to something that is accepted by pytables """ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 158cb351678f3..bd90323daf4bf 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -511,7 +511,7 @@ def test_append_frame_column_oriented(self): tm.assert_frame_equal(expected, result) # this isn't supported - self.assertRaises(Exception, store.select, 'df1', ( + self.assertRaises(TypeError, store.select, 'df1', ( 'columns=A', Term('index', '>', df.index[4]))) # selection on the non-indexable @@ -551,7 +551,7 @@ def check_indexers(key, indexers): # pass incorrect number of axes store.remove('p4d') - self.assertRaises(Exception, store.append, 'p4d', p4d.ix[ + self.assertRaises(ValueError, store.append, 'p4d', p4d.ix[ :, :, :10, :], axes=['major_axis', 'minor_axis']) # different than default indexables #1 @@ -615,11 +615,11 @@ def check_col(key,name,size): # apply the wrong field (similar to #1) store.append('s3', wp, min_itemsize={'major_axis': 20}) - self.assertRaises(Exception, store.append, 's3') + self.assertRaises(ValueError, store.append, 's3', wp2) # test truncation of bigger strings store.append('s4', wp) - self.assertRaises(Exception, store.append, 's4', wp2) + self.assertRaises(ValueError, store.append, 's4', wp2) # avoid truncation on elements df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) @@ -644,7 +644,7 @@ def check_col(key,name,size): store.append('df_new', df) df_new = DataFrame( [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) - self.assertRaises(Exception, store.append, 'df_new', df_new) + self.assertRaises(ValueError, store.append, 'df_new', df_new) # with nans store.remove('df') @@ -668,6 +668,18 @@ def check_col(key,name,size): store.append('df', df[5:], min_itemsize=200) tm.assert_frame_equal(store['df'], df) + # invalid min_itemsize keys + + df = DataFrame(['foo','foo','foo','barh','barh','barh'],columns=['A']) + + store.remove('df') + self.assertRaises(ValueError, store.append, 'df', df, min_itemsize={'foo' : 20, 'foobar' : 20}) + + # invalid sizes + store.remove('df') + store.append('df', df[:3], min_itemsize=3) + self.assertRaises(ValueError, store.append, 'df', df[3:]) + def test_append_with_data_columns(self): with ensure_clean(self.path) as store: @@ -842,7 +854,7 @@ def col(t,column): # try to index a non-table store.remove('f2') store.put('f2', df) - self.assertRaises(Exception, store.create_table_index, 'f2') + self.assertRaises(TypeError, store.create_table_index, 'f2') # try to change the version supports flag from pandas.io import pytables @@ -970,7 +982,7 @@ def test_append_diff_item_order(self): with ensure_clean(self.path) as store: store.put('panel', wp1, table=True) - self.assertRaises(Exception, store.put, 'panel', wp2, + self.assertRaises(ValueError, store.put, 'panel', wp2, append=True) def test_append_hierarchical(self): @@ -993,17 +1005,17 @@ def test_append_misc(self): # unsuported data types for non-tables p4d = tm.makePanel4D() - self.assertRaises(Exception, store.put,'p4d',p4d) + self.assertRaises(TypeError, store.put,'p4d',p4d) # unsupported data type for table s = tm.makeStringSeries() - self.assertRaises(Exception, store.append,'s',s) + self.assertRaises(TypeError, store.append,'s',s) # unsuported data types - self.assertRaises(Exception, store.put,'abc',None) - self.assertRaises(Exception, store.put,'abc','123') - self.assertRaises(Exception, store.put,'abc',123) - self.assertRaises(Exception, store.put,'abc',np.arange(5)) + self.assertRaises(TypeError, store.put,'abc',None) + self.assertRaises(TypeError, store.put,'abc','123') + self.assertRaises(TypeError, store.put,'abc',123) + self.assertRaises(TypeError, store.put,'abc',np.arange(5)) df = tm.makeDataFrame() store.append('df', df, chunksize=1) @@ -1024,12 +1036,12 @@ def test_append_raise(self): df = tm.makeDataFrame() df['invalid'] = [['a']] * len(df) self.assert_(df.dtypes['invalid'] == np.object_) - self.assertRaises(NotImplementedError, store.append,'df',df) + self.assertRaises(TypeError, store.append,'df',df) # multiple invalid columns df['invalid2'] = [['a']] * len(df) df['invalid3'] = [['a']] * len(df) - self.assertRaises(NotImplementedError, store.append,'df',df) + self.assertRaises(TypeError, store.append,'df',df) # datetime with embedded nans as object df = tm.makeDataFrame() @@ -1037,20 +1049,20 @@ def test_append_raise(self): s[0:5] = np.nan df['invalid'] = s self.assert_(df.dtypes['invalid'] == np.object_) - self.assertRaises(NotImplementedError, store.append,'df', df) + self.assertRaises(TypeError, store.append,'df', df) # directy ndarray - self.assertRaises(NotImplementedError, store.append,'df',np.arange(10)) + self.assertRaises(TypeError, store.append,'df',np.arange(10)) # series directly - self.assertRaises(NotImplementedError, store.append,'df',Series(np.arange(10))) + self.assertRaises(TypeError, store.append,'df',Series(np.arange(10))) # appending an incompatbile table df = tm.makeDataFrame() store.append('df',df) df['foo'] = 'foo' - self.assertRaises(Exception, store.append,'df',df) + self.assertRaises(ValueError, store.append,'df',df) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) @@ -1059,7 +1071,7 @@ def test_table_index_incompatible_dtypes(self): with ensure_clean(self.path) as store: store.put('frame', df1, table=True) - self.assertRaises(Exception, store.put, 'frame', df2, + self.assertRaises(TypeError, store.put, 'frame', df2, table=True, append=True) def test_table_values_dtypes_roundtrip(self): @@ -1074,7 +1086,7 @@ def test_table_values_dtypes_roundtrip(self): assert df2.dtypes == store['df_i8'].dtypes # incompatible dtype - self.assertRaises(Exception, store.append, 'df_i8', df1) + self.assertRaises(ValueError, store.append, 'df_i8', df1) # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought) df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A']) @@ -1157,7 +1169,7 @@ def test_unimplemented_dtypes_table_columns(self): df = tm.makeDataFrame() df[n] = f self.assertRaises( - NotImplementedError, store.append, 'df1_%s' % n, df) + TypeError, store.append, 'df1_%s' % n, df) # frame df = tm.makeDataFrame() @@ -1168,7 +1180,7 @@ def test_unimplemented_dtypes_table_columns(self): with ensure_clean(self.path) as store: # this fails because we have a date in the object block...... - self.assertRaises(Exception, store.append, 'df_unimplemented', df) + self.assertRaises(TypeError, store.append, 'df_unimplemented', df) def test_remove(self): @@ -1232,12 +1244,12 @@ def test_remove_where(self): # non - empty where store.remove('wp') store.put('wp', wp, table=True) - self.assertRaises(Exception, store.remove, + self.assertRaises(ValueError, store.remove, 'wp', ['foo']) # selectin non-table with a where # store.put('wp2', wp, table=False) - # self.assertRaises(Exception, store.remove, + # self.assertRaises(ValueError, store.remove, # 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): @@ -1753,7 +1765,7 @@ def test_select(self): tm.assert_panel_equal(expected, result) # selectin non-table with a where - # self.assertRaises(Exception, store.select, + # self.assertRaises(ValueError, store.select, # 'wp2', ('column', ['A', 'D'])) # select with columns= @@ -1983,11 +1995,11 @@ def test_frame_select(self): df = tm.makeTimeDataFrame() store.append('df_time', df) self.assertRaises( - Exception, store.select, 'df_time', [Term("index>0")]) + ValueError, store.select, 'df_time', [Term("index>0")]) # can't select if not written as table # store['frame'] = df - # self.assertRaises(Exception, store.select, + # self.assertRaises(ValueError, store.select, # 'frame', [crit1, crit2]) def test_string_select(self): @@ -2130,12 +2142,12 @@ def test_append_to_multiple(self): with ensure_clean(self.path) as store: # exceptions - self.assertRaises(Exception, store.append_to_multiple, + self.assertRaises(ValueError, store.append_to_multiple, {'df1': ['A', 'B'], 'df2': None}, df, selector='df3') - self.assertRaises(Exception, store.append_to_multiple, + self.assertRaises(ValueError, store.append_to_multiple, {'df1': None, 'df2': None}, df, selector='df3') self.assertRaises( - Exception, store.append_to_multiple, 'df1', df, 'df1') + ValueError, store.append_to_multiple, 'df1', df, 'df1') # regular operation store.append_to_multiple( @@ -2191,7 +2203,7 @@ def test_select_as_multiple(self): # test excpection for diff rows store.append('df3', tm.makeTimeDataFrame(nper=50)) - self.assertRaises(Exception, store.select_as_multiple, + self.assertRaises(ValueError, store.select_as_multiple, ['df1','df3'], where=['A>0', 'B>0'], selector='df1') def test_start_stop(self): diff --git a/pandas/lib.pyx b/pandas/lib.pyx index e12b524dda736..05171523764c8 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -758,13 +758,16 @@ def max_len_string_array(ndarray[object, ndim=1] arr): cdef: int i, m, l length = arr.shape[0] + object v m = 0 for i from 0 <= i < length: - l = len(arr[i]) + v = arr[i] + if PyString_Check(v): + l = len(v) - if l > m: - m = l + if l > m: + m = l return m