diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index 5aa8f76917797..d7b3be25a18b9 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -1,34 +1,40 @@ -from .pandas_vb_common import * -import os +import numpy as np +from pandas import DataFrame, Panel, date_range, HDFStore +import pandas.util.testing as tm +from .pandas_vb_common import BaseIO, setup # noqa -class HDF5(object): - goal_time = 0.2 - - def setup(self): - self.index = tm.makeStringIndex(25000) - self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000),}, - index=self.index) - self.df_mixed = DataFrame( - {'float1': randn(25000), 'float2': randn(25000), - 'string1': (['foo'] * 25000), - 'bool1': ([True] * 25000), - 'int1': np.random.randint(0, 250000, size=25000),}, - index=self.index) +class HDF5(BaseIO): - self.df_wide = DataFrame(np.random.randn(25000, 100)) - - self.df2 = DataFrame({'float1': randn(25000), 'float2': randn(25000)}, - index=date_range('1/1/2000', periods=25000)) - self.df_wide2 = DataFrame(np.random.randn(25000, 100), - index=date_range('1/1/2000', periods=25000)) + goal_time = 0.2 - self.df_dc = DataFrame(np.random.randn(10000, 10), - columns=[('C%03d' % i) for i in range(10)]) + def setup(self): + N = 25000 + index = tm.makeStringIndex(N) + self.df = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N)}, + index=index) + self.df_mixed = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N), + 'string1': ['foo'] * N, + 'bool1': [True] * N, + 'int1': np.random.randint(0, N, size=N)}, + index=index) + self.df_wide = DataFrame(np.random.randn(N, 100)) + self.start_wide = self.df_wide.index[10000] + self.stop_wide = self.df_wide.index[15000] + self.df2 = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N)}, + index=date_range('1/1/2000', periods=N)) + self.start = self.df2.index[10000] + self.stop = self.df2.index[15000] + self.df_wide2 = DataFrame(np.random.randn(N, 100), + index=date_range('1/1/2000', periods=N)) + self.df_dc = DataFrame(np.random.randn(N, 10), + columns=['C%03d' % i for i in range(10)]) self.f = '__test__.h5' - self.remove(self.f) self.store = HDFStore(self.f) self.store.put('fixed', self.df) @@ -42,12 +48,6 @@ def teardown(self): self.store.close() self.remove(self.f) - def remove(self, f): - try: - os.remove(f) - except: - pass - def time_read_store(self): self.store.get('fixed') @@ -82,14 +82,12 @@ def time_write_store_table_dc(self): self.store.append('table_dc_write', self.df_dc, data_columns=True) def time_query_store_table_wide(self): - start = self.df_wide2.index[10000] - stop = self.df_wide2.index[15000] - self.store.select('table_wide', where="index > start and index < stop") + self.store.select('table_wide', where="index > self.start_wide and " + "index < self.stop_wide") def time_query_store_table(self): - start = self.df2.index[10000] - stop = self.df2.index[15000] - self.store.select('table', where="index > start and index < stop") + self.store.select('table', where="index > self.start and " + "index < self.stop") def time_store_repr(self): repr(self.store) @@ -101,16 +99,16 @@ def time_store_info(self): self.store.info() -class HDF5Panel(object): +class HDF5Panel(BaseIO): + goal_time = 0.2 def setup(self): self.f = '__test__.h5' - self.p = Panel(randn(20, 1000, 25), - items=[('Item%03d' % i) for i in range(20)], + self.p = Panel(np.random.randn(20, 1000, 25), + items=['Item%03d' % i for i in range(20)], major_axis=date_range('1/1/2000', periods=1000), - minor_axis=[('E%03d' % i) for i in range(25)]) - self.remove(self.f) + minor_axis=['E%03d' % i for i in range(25)]) self.store = HDFStore(self.f) self.store.append('p1', self.p) @@ -118,12 +116,6 @@ def teardown(self): self.store.close() self.remove(self.f) - def remove(self, f): - try: - os.remove(f) - except: - pass - def time_read_store_table_panel(self): self.store.select('p1') diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py index c718b13912e73..e8112cc41f032 100644 --- a/asv_bench/benchmarks/io_bench.py +++ b/asv_bench/benchmarks/io_bench.py @@ -8,23 +8,7 @@ import timeit -class _BenchTeardown(object): - """ - base class for teardown method implementation - """ - fname = None - - def remove(self, f): - try: - os.remove(f) - except: - pass - - def teardown(self): - self.remove(self.fname) - - -class frame_to_csv(_BenchTeardown): +class frame_to_csv(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -35,7 +19,7 @@ def time_frame_to_csv(self): self.df.to_csv(self.fname) -class frame_to_csv2(_BenchTeardown): +class frame_to_csv2(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -49,7 +33,7 @@ def time_frame_to_csv2(self): self.df.to_csv(self.fname) -class frame_to_csv_date_formatting(_BenchTeardown): +class frame_to_csv_date_formatting(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -61,7 +45,7 @@ def time_frame_to_csv_date_formatting(self): self.data.to_csv(self.fname, date_format='%Y%m%d') -class frame_to_csv_mixed(_BenchTeardown): +class frame_to_csv_mixed(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -114,7 +98,7 @@ def time_read_csv_infer_datetime_format_ymd(self): read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) -class read_csv_skiprows(_BenchTeardown): +class read_csv_skiprows(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -127,7 +111,7 @@ def time_read_csv_skiprows(self): read_csv(self.fname, skiprows=10000) -class read_csv_standard(_BenchTeardown): +class read_csv_standard(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -174,7 +158,7 @@ def time_read_uint64_na_values(self): read_csv(StringIO(self.data1), header=None, na_values=self.na_values) -class write_csv_standard(_BenchTeardown): +class write_csv_standard(BaseIO): goal_time = 0.2 fname = '__test__.csv' @@ -218,14 +202,14 @@ def time_read_nrows(self, compression, engine): compression=compression, engine=engine) -class read_json_lines(_BenchTeardown): +class read_json_lines(BaseIO): goal_time = 0.2 fname = "__test__.json" def setup(self): self.N = 100000 self.C = 5 - self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)}) + self.df = DataFrame({'float{0}'.format(i): randn(self.N) for i in range(self.C)}) self.df.to_json(self.fname,orient="records",lines=True) def time_read_json_lines(self): diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 62eb826418030..74517f184ae6f 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -1,3 +1,4 @@ +import os from pandas import * import pandas as pd from numpy.random import randn @@ -19,6 +20,25 @@ def setup(*args, **kwargs): np.random.seed(1234) + +class BaseIO(object): + """ + Base class for IO benchmarks + """ + fname = None + + def remove(self, f): + """Remove created files""" + try: + os.remove(f) + except: + # On Windows, attempting to remove a file that is in use + # causes an exception to be raised + pass + + def teardown(self): + self.remove(self.fname) + # try em until it works! for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']: try: