Skip to content

Commit

Permalink
Use unique values
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Aug 2, 2023
1 parent 467941f commit e360fea
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def time_from_tuples(self):
class StringArray:
def setup(self):
N = 100_000
values = np.array(["a"] * N, dtype=object)
values = np.array([str(i) for i in range(N)], dtype=object)
self.values_obj = np.array(values, dtype="object")
self.values_str = np.array(values, dtype="U")
self.values_list = values.tolist()
Expand All @@ -78,7 +78,7 @@ def setup(self, multiple_chunks):
import pyarrow as pa
except ImportError:
raise NotImplementedError
strings = np.array(["a"] * 10_000, dtype=object)
strings = np.array([str(i) for i in range(10_000)], dtype=object)
if multiple_chunks:
chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
Expand Down Expand Up @@ -125,7 +125,7 @@ def setup(self, dtype, hasna):
elif dtype == "int64[pyarrow]":
data = np.arange(N)
elif dtype == "string[pyarrow]":
data = np.array(["a"] * N, dtype=object)
data = np.array([str(i) for i in range(N)], dtype=object)
elif dtype == "timestamp[ns][pyarrow]":
data = pd.date_range("2000-01-01", freq="s", periods=N)
else:
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def setup(self, dtype):
data = np.arange(N)
na_value = NA
elif dtype in ("string", "string[pyarrow]"):
data = np.array(["a"] * N, dtype=object)
data = np.array([str(i) * 5 for i in range(N)], dtype=object)
na_value = NA
else:
raise NotImplementedError
Expand Down
6 changes: 4 additions & 2 deletions asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class Construction:
dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}

def setup(self, pd_type, dtype):
series_arr = np.array(["a" * 10] * 100_000, dtype=self.dtype_mapping[dtype])
series_arr = np.array(
[str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype]
)
if pd_type == "series":
self.arr = series_arr
elif pd_type == "frame":
Expand Down Expand Up @@ -274,7 +276,7 @@ def time_iter(self, dtype):

class StringArrayConstruction:
def setup(self):
self.series_arr = np.array(["a" * 10] * 10**5, dtype=object)
self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object)
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])

def time_string_array_construction(self):
Expand Down

0 comments on commit e360fea

Please sign in to comment.