diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py index 506f334d83d01..09c4acc0ab309 100644 --- a/asv_bench/benchmarks/array.py +++ b/asv_bench/benchmarks/array.py @@ -54,7 +54,7 @@ def time_from_tuples(self): class StringArray: def setup(self): N = 100_000 - values = np.array(["a"] * N, dtype=object) + values = np.array([str(i) for i in range(N)], dtype=object) self.values_obj = np.array(values, dtype="object") self.values_str = np.array(values, dtype="U") self.values_list = values.tolist() @@ -78,7 +78,7 @@ def setup(self, multiple_chunks): import pyarrow as pa except ImportError: raise NotImplementedError - strings = np.array(["a"] * 10_000, dtype=object) + strings = np.array([str(i) for i in range(10_000)], dtype=object) if multiple_chunks: chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)] self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks)) @@ -125,7 +125,7 @@ def setup(self, dtype, hasna): elif dtype == "int64[pyarrow]": data = np.arange(N) elif dtype == "string[pyarrow]": - data = np.array(["a"] * N, dtype=object) + data = np.array([str(i) for i in range(N)], dtype=object) elif dtype == "timestamp[ns][pyarrow]": data = pd.date_range("2000-01-01", freq="s", periods=N) else: diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 76cc803ecc2da..288369145576e 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -104,7 +104,7 @@ def setup(self, dtype): data = np.arange(N) na_value = NA elif dtype in ("string", "string[pyarrow]"): - data = np.array(["a"] * N, dtype=object) + data = np.array([str(i) * 5 for i in range(N)], dtype=object) na_value = NA else: raise NotImplementedError diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 712d2afb81d08..d70d9d0aa5227 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -34,7 +34,9 @@ class Construction: dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object} def setup(self, pd_type, dtype): - series_arr = np.array(["a" * 10] * 100_000, dtype=self.dtype_mapping[dtype]) + series_arr = np.array( + [str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype] + ) if pd_type == "series": self.arr = series_arr elif pd_type == "frame": @@ -274,7 +276,7 @@ def time_iter(self, dtype): class StringArrayConstruction: def setup(self): - self.series_arr = np.array(["a" * 10] * 10**5, dtype=object) + self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object) self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)]) def time_string_array_construction(self):