diff --git a/examples/cfd/lid_driven_cavity_2d.py b/examples/cfd/lid_driven_cavity_2d.py index dfb1092..d790c2c 100644 --- a/examples/cfd/lid_driven_cavity_2d.py +++ b/examples/cfd/lid_driven_cavity_2d.py @@ -80,7 +80,8 @@ def run(self, num_steps, post_process_interval=100): def post_process(self, i): # Write the results. We'll use JAX backend for the post-processing if not isinstance(self.f_0, jnp.ndarray): - f_0 = wp.to_jax(self.f_0) + # If the backend is warp, we need to drop the last dimension added by warp for 2D simulations + f_0 = wp.to_jax(self.f_0)[..., 0] else: f_0 = self.f_0 diff --git a/examples/cfd/windtunnel_3d.py b/examples/cfd/windtunnel_3d.py index c83e2c9..96c79f7 100644 --- a/examples/cfd/windtunnel_3d.py +++ b/examples/cfd/windtunnel_3d.py @@ -74,8 +74,8 @@ def define_boundary_indices(self): walls = [box["bottom"][i] + box["top"][i] + box["front"][i] + box["back"][i] for i in range(self.velocity_set.d)] walls = np.unique(np.array(walls), axis=-1).tolist() - # Load the mesh - stl_filename = "examples/cfd/stl-files/DrivAer-Notchback.stl" + # Load the mesh (replace with your own mesh) + stl_filename = "../stl-files/DrivAer-Notchback.stl" mesh = trimesh.load_mesh(stl_filename, process=False) mesh_vertices = mesh.vertices diff --git a/requirements.txt b/requirements.txt index ee107af..4d0cd2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,10 @@ -jax==0.4.20 -jaxlib==0.4.20 -matplotlib==3.8.0 -numpy==1.26.1 -pyvista==0.43.4 -Rtree==1.0.1 -trimesh==4.4.1 -orbax-checkpoint==0.4.1 -termcolor==2.3.0 -PhantomGaze @ git+https://github.com/loliverhennigh/PhantomGaze.git -tqdm==4.66.2 -warp-lang==1.0.2 -numpy-stl==3.1.1 -pydantic==2.7.0 -ruff==0.5.6 \ No newline at end of file +jax[cuda] +matplotlib +numpy +pyvista +Rtree +trimesh +warp-lang +numpy-stl +pydantic +ruff \ No newline at end of file diff --git a/tests/boundary_conditions/bc_equilibrium/test_bc_equilibrium_warp.py b/tests/boundary_conditions/bc_equilibrium/test_bc_equilibrium_warp.py index 5eb0c10..6bd9311 100644 --- a/tests/boundary_conditions/bc_equilibrium/test_bc_equilibrium_warp.py +++ b/tests/boundary_conditions/bc_equilibrium/test_bc_equilibrium_warp.py @@ -72,7 +72,7 @@ def test_bc_equilibrium_warp(dim, velocity_set, grid_shape): f = f.numpy() f_post = f_post.numpy() - assert f.shape == (velocity_set.q,) + grid_shape + assert f.shape == (velocity_set.q,) + grid_shape if dim == 3 else (velocity_set.q, grid_shape[0], grid_shape[1], 1) # Assert that the values are correct in the indices of the sphere weights = velocity_set.w diff --git a/tests/boundary_conditions/bc_fullway_bounce_back/test_bc_fullway_bounce_back_warp.py b/tests/boundary_conditions/bc_fullway_bounce_back/test_bc_fullway_bounce_back_warp.py index 10b9244..59c6c9d 100644 --- a/tests/boundary_conditions/bc_fullway_bounce_back/test_bc_fullway_bounce_back_warp.py +++ b/tests/boundary_conditions/bc_fullway_bounce_back/test_bc_fullway_bounce_back_warp.py @@ -58,7 +58,10 @@ def test_fullway_bounce_back_warp(dim, velocity_set, grid_shape): bc_mask, missing_mask = indices_boundary_masker([fullway_bc], bc_mask, missing_mask, start_index=None) # Generate a random field with the same shape - random_field = np.random.rand(velocity_set.q, *grid_shape).astype(np.float32) + if dim == 2: + random_field = np.random.rand(velocity_set.q, grid_shape[0], grid_shape[1], 1).astype(np.float32) + else: + random_field = np.random.rand(velocity_set.q, grid_shape[0], grid_shape[1], grid_shape[2]).astype(np.float32) # Add the random field to f_pre f_pre = wp.array(random_field) @@ -71,7 +74,7 @@ def test_fullway_bounce_back_warp(dim, velocity_set, grid_shape): f = f_pre.numpy() f_post = f_post.numpy() - assert f.shape == (velocity_set.q,) + grid_shape + assert f.shape == (velocity_set.q,) + grid_shape if dim == 3 else (velocity_set.q, grid_shape[0], grid_shape[1], 1) for i in range(velocity_set.q): np.allclose( diff --git a/tests/grids/test_grid_warp.py b/tests/grids/test_grid_warp.py index 782434d..61b27d4 100644 --- a/tests/grids/test_grid_warp.py +++ b/tests/grids/test_grid_warp.py @@ -22,8 +22,10 @@ def test_warp_grid_create_field(grid_size): init_xlb_env(xlb.velocity_set.D3Q19) my_grid = grid_factory(grid_shape) f = my_grid.create_field(cardinality=9, dtype=Precision.FP32) - - assert f.shape == (9,) + grid_shape, "Field shape is incorrect" + if len(grid_shape) == 2: + assert f.shape == (9,) + grid_shape + (1,), "Field shape is incorrect got {}".format(f.shape) + else: + assert f.shape == (9,) + grid_shape, "Field shape is incorrect got {}".format(f.shape) assert isinstance(f, wp.array), "Field should be a Warp ndarray" @@ -37,7 +39,6 @@ def test_warp_grid_create_field_fill_value(): assert isinstance(f, wp.array), "Field should be a Warp ndarray" f = f.numpy() - assert f.shape == (9,) + grid_shape, "Field shape is incorrect" assert np.allclose(f, fill_value), "Field not properly initialized with fill_value" diff --git a/tests/kernels/stream/test_stream_warp.py b/tests/kernels/stream/test_stream_warp.py index 0d100cf..95fcc05 100644 --- a/tests/kernels/stream/test_stream_warp.py +++ b/tests/kernels/stream/test_stream_warp.py @@ -61,7 +61,7 @@ def test_stream_operator_warp(dim, velocity_set, grid_shape): expected = jnp.stack(expected, axis=0) if dim == 2: - f_initial_warp = wp.array(f_initial) + f_initial_warp = wp.array(f_initial[..., np.newaxis]) elif dim == 3: f_initial_warp = wp.array(f_initial) @@ -71,7 +71,10 @@ def test_stream_operator_warp(dim, velocity_set, grid_shape): f_streamed = my_grid_warp.create_field(cardinality=velocity_set.q) f_streamed = stream_op(f_initial_warp, f_streamed) - assert jnp.allclose(f_streamed.numpy(), np.array(expected)), "Streaming did not occur as expected" + if len(grid_shape) == 2: + assert jnp.allclose(f_streamed.numpy()[..., 0], np.array(expected)), "Streaming did not occur as expected" + else: + assert jnp.allclose(f_streamed.numpy(), np.array(expected)), "Streaming did not occur as expected" if __name__ == "__main__": diff --git a/xlb/grid/warp_grid.py b/xlb/grid/warp_grid.py index 5018962..c74fc2f 100644 --- a/xlb/grid/warp_grid.py +++ b/xlb/grid/warp_grid.py @@ -21,7 +21,9 @@ def create_field( fill_value=None, ): dtype = dtype.wp_dtype if dtype else DefaultConfig.default_precision_policy.store_precision.wp_dtype - shape = (cardinality,) + (self.shape) + + # Check if shape is 2D, and if so, append a singleton dimension to the shape + shape = (cardinality,) + (self.shape if len(self.shape) != 2 else self.shape + (1,)) if fill_value is None: f = wp.zeros(shape, dtype=dtype) diff --git a/xlb/operator/boundary_condition/bc_do_nothing.py b/xlb/operator/boundary_condition/bc_do_nothing.py index 4b7ac90..56a332f 100644 --- a/xlb/operator/boundary_condition/bc_do_nothing.py +++ b/xlb/operator/boundary_condition/bc_do_nothing.py @@ -64,58 +64,7 @@ def functional( ): return f_pre - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.uint8), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(DoNothingBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the result - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(DoNothingBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the result - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d + kernel = self._construct_kernel(functional) return functional, kernel @@ -127,4 +76,4 @@ def warp_implementation(self, f_pre, f_post, bc_mask, missing_mask): inputs=[f_pre, f_post, bc_mask, missing_mask], dim=f_pre.shape[1:], ) - return f_post + return f_post \ No newline at end of file diff --git a/xlb/operator/boundary_condition/bc_equilibrium.py b/xlb/operator/boundary_condition/bc_equilibrium.py index 4dd4b9e..77f408f 100644 --- a/xlb/operator/boundary_condition/bc_equilibrium.py +++ b/xlb/operator/boundary_condition/bc_equilibrium.py @@ -88,59 +88,8 @@ def functional( _f = self.equilibrium_operator.warp_functional(_rho, _u) return _f - # Construct the warp kernel - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(EquilibriumBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the result - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(EquilibriumBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the result - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d + # Use the parent class's kernel and pass the functional + kernel = self._construct_kernel(functional) return functional, kernel @@ -152,4 +101,4 @@ def warp_implementation(self, f_pre, f_post, bc_mask, missing_mask): inputs=[f_pre, f_post, bc_mask, missing_mask], dim=f_pre.shape[1:], ) - return f_post + return f_post \ No newline at end of file diff --git a/xlb/operator/boundary_condition/bc_extrapolation_outflow.py b/xlb/operator/boundary_condition/bc_extrapolation_outflow.py index 53645c6..38657e5 100644 --- a/xlb/operator/boundary_condition/bc_extrapolation_outflow.py +++ b/xlb/operator/boundary_condition/bc_extrapolation_outflow.py @@ -140,15 +140,7 @@ def _construct_warp(self): _opp_indices = self.velocity_set.opp_indices @wp.func - def get_normal_vectors_2d( - missing_mask: Any, - ): - for l in range(_q): - if missing_mask[l] == wp.uint8(1) and wp.abs(_c[0, l]) + wp.abs(_c[1, l]) == 1: - return -wp.vec2i(_c[0, l], _c[1, l]) - - @wp.func - def get_normal_vectors_3d( + def get_normal_vectors( missing_mask: Any, ): for l in range(_q): @@ -175,34 +167,7 @@ def functional( return _f @wp.func - def prepare_bc_auxilary_data_2d( - index: Any, - timestep: Any, - missing_mask: Any, - f_0: Any, - f_1: Any, - f_pre: Any, - f_post: Any, - ): - # Preparing the formulation for this BC using the neighbour's populations stored in f_aux and - # f_pre (post-streaming values of the current voxel). We use directions that leave the domain - # for storing this prepared data. - _f = f_post - nv = get_normal_vectors_2d(missing_mask) - for l in range(self.velocity_set.q): - if missing_mask[l] == wp.uint8(1): - # f_0 is the post-collision values of the current time-step - # Get pull index associated with the "neighbours" pull_index - pull_index = type(index)() - for d in range(self.velocity_set.d): - pull_index[d] = index[d] - (_c[d, l] + nv[d]) - # The following is the post-streaming values of the neighbor cell - f_aux = self.compute_dtype(f_0[l, pull_index[0], pull_index[1]]) - _f[_opp_indices[l]] = (self.compute_dtype(1.0) - sound_speed) * f_pre[l] + sound_speed * f_aux - return _f - - @wp.func - def prepare_bc_auxilary_data_3d( + def prepare_bc_auxilary_data( index: Any, timestep: Any, missing_mask: Any, @@ -215,7 +180,7 @@ def prepare_bc_auxilary_data_3d( # f_pre (post-streaming values of the current voxel). We use directions that leave the domain # for storing this prepared data. _f = f_post - nv = get_normal_vectors_3d(missing_mask) + nv = get_normal_vectors(missing_mask) for l in range(self.velocity_set.q): if missing_mask[l] == wp.uint8(1): # f_0 is the post-collision values of the current time-step @@ -228,73 +193,7 @@ def prepare_bc_auxilary_data_3d( _f[_opp_indices[l]] = (self.compute_dtype(1.0) - sound_speed) * f_pre[l] + sound_speed * f_aux return _f - # Construct the warp kernel - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - timestep = 0 - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # special preparation of auxiliary data - if _boundary_id == wp.uint8(ExtrapolationOutflowBC.id): - _f_pre = prepare_bc_auxilary_data_2d(index, timestep, missing_mask, f_pre, f_post, f_pre, f_post) - - # Apply the boundary condition - if _boundary_id == wp.uint8(ExtrapolationOutflowBC.id): - # TODO: is there any way for this BC to have a meaningful kernel given that it has two steps after both - # collision and streaming? - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - timestep = 0 - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - _f_aux = _f_vec() - - # special preparation of auxiliary data - if _boundary_id == wp.uint8(ExtrapolationOutflowBC.id): - _f_pre = prepare_bc_auxilary_data_3d(index, timestep, missing_mask, f_pre, f_post, f_pre, f_post) - - # Apply the boundary condition - if _boundary_id == wp.uint8(ExtrapolationOutflowBC.id): - # TODO: is there any way for this BC to have a meaninful kernel given that it has two steps after both - # collision and streaming? - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - prepare_bc_auxilary_data = prepare_bc_auxilary_data_3d if self.velocity_set.d == 3 else prepare_bc_auxilary_data_2d + kernel = self._construct_kernel(functional) return (functional, prepare_bc_auxilary_data), kernel @@ -306,4 +205,4 @@ def warp_implementation(self, f_pre, f_post, bc_mask, missing_mask): inputs=[f_pre, f_post, bc_mask, missing_mask], dim=f_pre.shape[1:], ) - return f_post + return f_post \ No newline at end of file diff --git a/xlb/operator/boundary_condition/bc_fullway_bounce_back.py b/xlb/operator/boundary_condition/bc_fullway_bounce_back.py index 8569e84..19a3013 100644 --- a/xlb/operator/boundary_condition/bc_fullway_bounce_back.py +++ b/xlb/operator/boundary_condition/bc_fullway_bounce_back.py @@ -74,57 +74,7 @@ def functional( fliped_f[l] = f_pre[_opp_indices[l]] return fliped_f - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - ): # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # Check if the boundary is active - if _boundary_id == wp.uint8(FullwayBounceBackBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the result to the output - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - - # Check if the boundary is active - if _boundary_id == wp.uint8(FullwayBounceBackBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the result to the output - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d + kernel = self._construct_kernel(functional) return functional, kernel @@ -136,4 +86,4 @@ def warp_implementation(self, f_pre, f_post, bc_mask, missing_mask): inputs=[f_pre, f_post, bc_mask, missing_mask], dim=f_pre.shape[1:], ) - return f_post + return f_post \ No newline at end of file diff --git a/xlb/operator/boundary_condition/bc_grads_approximation.py b/xlb/operator/boundary_condition/bc_grads_approximation.py index bc29851..94ddba3 100644 --- a/xlb/operator/boundary_condition/bc_grads_approximation.py +++ b/xlb/operator/boundary_condition/bc_grads_approximation.py @@ -307,37 +307,10 @@ def functional_method2( f_post = grads_approximate_fpop(missing_mask, rho_target, u_target, f_post) return f_post - # Construct the warp kernel - @wp.kernel - def kernel( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - timestep = 0 - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - _f_aux = _f_vec() - - # Apply the boundary condition - if _boundary_id == wp.uint8(GradsApproximationBC.id): - # TODO: is there any way for this BC to have a meaninful kernel given that it has two steps after both - # collision and streaming? - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - functional = functional_method1 + kernel = self._construct_kernel(functional) + return functional, kernel @Operator.register_backend(ComputeBackend.WARP) @@ -348,4 +321,4 @@ def warp_implementation(self, f_pre, f_post, bc_mask, missing_mask): inputs=[f_pre, f_post, bc_mask, missing_mask], dim=f_pre.shape[1:], ) - return f_post + return f_post \ No newline at end of file diff --git a/xlb/operator/boundary_condition/bc_halfway_bounce_back.py b/xlb/operator/boundary_condition/bc_halfway_bounce_back.py index e8df6b7..bf04af0 100644 --- a/xlb/operator/boundary_condition/bc_halfway_bounce_back.py +++ b/xlb/operator/boundary_condition/bc_halfway_bounce_back.py @@ -87,59 +87,7 @@ def functional( return _f - # Construct the warp kernel - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(HalfwayBounceBackBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(HalfwayBounceBackBC.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d + kernel = self._construct_kernel(functional) return functional, kernel diff --git a/xlb/operator/boundary_condition/bc_regularized.py b/xlb/operator/boundary_condition/bc_regularized.py index 065a0b0..af4c783 100644 --- a/xlb/operator/boundary_condition/bc_regularized.py +++ b/xlb/operator/boundary_condition/bc_regularized.py @@ -159,15 +159,7 @@ def _get_fsum( return fsum_known + fsum_middle @wp.func - def get_normal_vectors_2d( - missing_mask: Any, - ): - for l in range(_q): - if missing_mask[l] == wp.uint8(1) and wp.abs(_c[0, l]) + wp.abs(_c[1, l]) == 1: - return -_u_vec(_c_float[0, l], _c_float[1, l]) - - @wp.func - def get_normal_vectors_3d( + def get_normal_vectors( missing_mask: Any, ): for l in range(_q): @@ -211,7 +203,7 @@ def regularize_fpop( return fpop @wp.func - def functional3d_velocity( + def functional_velocity( index: Any, timestep: Any, missing_mask: Any, @@ -224,7 +216,7 @@ def functional3d_velocity( _f = f_post # Find normal vector - normals = get_normal_vectors_3d(missing_mask) + normals = get_normal_vectors(missing_mask) # calculate rho fsum = _get_fsum(_f, missing_mask) @@ -242,7 +234,7 @@ def functional3d_velocity( return _f @wp.func - def functional3d_pressure( + def functional_pressure( index: Any, timestep: Any, missing_mask: Any, @@ -255,7 +247,7 @@ def functional3d_pressure( _f = f_post # Find normal vector - normals = get_normal_vectors_3d(missing_mask) + normals = get_normal_vectors(missing_mask) # calculate velocity fsum = _get_fsum(_f, missing_mask) @@ -270,127 +262,11 @@ def functional3d_pressure( _f = regularize_fpop(_f, feq) return _f - @wp.func - def functional2d_velocity( - index: Any, - timestep: Any, - missing_mask: Any, - f_0: Any, - f_1: Any, - f_pre: Any, - f_post: Any, - ): - # Post-streaming values are only modified at missing direction - _f = f_post - - # Find normal vector - normals = get_normal_vectors_2d(missing_mask) - - # calculate rho - fsum = _get_fsum(_f, missing_mask) - unormal = self.compute_dtype(0.0) - for d in range(_d): - unormal += _u[d] * normals[d] - _rho = fsum / (self.compute_dtype(1.0) + unormal) - - # impose non-equilibrium bounceback - feq = self.equilibrium_operator.warp_functional(_rho, _u) - _f = bounceback_nonequilibrium(_f, feq, missing_mask) - - # Regularize the boundary fpop - _f = regularize_fpop(_f, feq) - return _f - - @wp.func - def functional2d_pressure( - index: Any, - timestep: Any, - missing_mask: Any, - f_0: Any, - f_1: Any, - f_pre: Any, - f_post: Any, - ): - # Post-streaming values are only modified at missing direction - _f = f_post - - # Find normal vector - normals = get_normal_vectors_2d(missing_mask) - - # calculate velocity - fsum = _get_fsum(_f, missing_mask) - unormal = -self.compute_dtype(1.0) + fsum / _rho - _u = unormal * normals - - # impose non-equilibrium bounceback - feq = self.equilibrium_operator.warp_functional(_rho, _u) - _f = bounceback_nonequilibrium(_f, feq, missing_mask) - - # Regularize the boundary fpop - _f = regularize_fpop(_f, feq) - return _f - - # Construct the warp kernel - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(self.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(self.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - if self.velocity_set.d == 3 and self.bc_type == "velocity": - functional = functional3d_velocity - elif self.velocity_set.d == 3 and self.bc_type == "pressure": - functional = functional3d_pressure - elif self.bc_type == "velocity": - functional = functional2d_velocity - else: - functional = functional2d_pressure + if self.bc_type == "velocity": + functional = functional_velocity + elif self.bc_type == "pressure": + functional = functional_pressure + kernel = self._construct_kernel(functional) return functional, kernel diff --git a/xlb/operator/boundary_condition/bc_zouhe.py b/xlb/operator/boundary_condition/bc_zouhe.py index 4be2cf2..a92d909 100644 --- a/xlb/operator/boundary_condition/bc_zouhe.py +++ b/xlb/operator/boundary_condition/bc_zouhe.py @@ -189,15 +189,6 @@ def _construct_warp(self): _c_float = self.velocity_set.c_float # TODO: this is way less than ideal. we should not be making new types - @wp.func - def get_normal_vectors_2d( - lattice_direction: Any, - ): - l = lattice_direction - if wp.abs(_c[0, l]) + wp.abs(_c[1, l]) == 1: - normals = -_u_vec(_c_float[0, l], _c_float[1, l]) - return normals - @wp.func def _get_fsum( fpop: Any, @@ -213,7 +204,7 @@ def _get_fsum( return fsum_known + fsum_middle @wp.func - def get_normal_vectors_3d( + def get_normal_vectors( missing_mask: Any, ): for l in range(_q): @@ -232,7 +223,7 @@ def bounceback_nonequilibrium( return fpop @wp.func - def functional3d_velocity( + def functional_velocity( index: Any, timestep: Any, missing_mask: Any, @@ -245,7 +236,7 @@ def functional3d_velocity( _f = f_post # Find normal vector - normals = get_normal_vectors_3d(missing_mask) + normals = get_normal_vectors(missing_mask) # calculate rho fsum = _get_fsum(_f, missing_mask) @@ -260,7 +251,7 @@ def functional3d_velocity( return _f @wp.func - def functional3d_pressure( + def functional_pressure( index: Any, timestep: Any, missing_mask: Any, @@ -273,7 +264,7 @@ def functional3d_pressure( _f = f_post # Find normal vector - normals = get_normal_vectors_3d(missing_mask) + normals = get_normal_vectors(missing_mask) # calculate velocity fsum = _get_fsum(_f, missing_mask) @@ -285,121 +276,14 @@ def functional3d_pressure( _f = bounceback_nonequilibrium(_f, feq, missing_mask) return _f - @wp.func - def functional2d_velocity( - index: Any, - timestep: Any, - missing_mask: Any, - f_0: Any, - f_1: Any, - f_pre: Any, - f_post: Any, - ): - # Post-streaming values are only modified at missing direction - _f = f_post - - # Find normal vector - normals = get_normal_vectors_2d(missing_mask) - - # calculate rho - fsum = _get_fsum(_f, missing_mask) - unormal = self.compute_dtype(0.0) - for d in range(_d): - unormal += _u[d] * normals[d] - _rho = fsum / (self.compute_dtype(1.0) + unormal) - - # impose non-equilibrium bounceback - feq = self.equilibrium_operator.warp_functional(_rho, _u) - _f = bounceback_nonequilibrium(_f, feq, missing_mask) - return _f - - @wp.func - def functional2d_pressure( - index: Any, - timestep: Any, - missing_mask: Any, - f_0: Any, - f_1: Any, - f_pre: Any, - f_post: Any, - ): - # Post-streaming values are only modified at missing direction - _f = f_post - - # Find normal vector - normals = get_normal_vectors_2d(missing_mask) - - # calculate velocity - fsum = _get_fsum(_f, missing_mask) - unormal = -self.compute_dtype(1.0) + fsum / _rho - _u = unormal * normals - - # impose non-equilibrium bounceback - feq = self.equilibrium_operator.warp_functional(_rho, _u) - _f = bounceback_nonequilibrium(_f, feq, missing_mask) - return _f - - # Construct the warp kernel - @wp.kernel - def kernel2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_2d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(self.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1]] = self.store_dtype(_f[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( - f_pre: wp.array4d(dtype=Any), - f_post: wp.array4d(dtype=Any), - bc_mask: wp.array4d(dtype=wp.uint8), - missing_mask: wp.array4d(dtype=wp.bool), - ): - # Get the global index - i, j, k = wp.tid() - index = wp.vec3i(i, j, k) - - # read tid data - _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data_3d(f_pre, f_post, bc_mask, missing_mask, index) - - # Apply the boundary condition - if _boundary_id == wp.uint8(self.id): - timestep = 0 - _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) - else: - _f = _f_post - - # Write the distribution function - for l in range(self.velocity_set.q): - f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - if self.velocity_set.d == 3 and self.bc_type == "velocity": - functional = functional3d_velocity - elif self.velocity_set.d == 3 and self.bc_type == "pressure": - functional = functional3d_pressure + if self.bc_type == "velocity": + functional = functional_velocity + elif self.bc_type == "pressure": + functional = functional_pressure elif self.bc_type == "velocity": - functional = functional2d_velocity - else: - functional = functional2d_pressure + functional = functional_pressure + + kernel = self._construct_kernel(functional) return functional, kernel diff --git a/xlb/operator/boundary_condition/boundary_condition.py b/xlb/operator/boundary_condition/boundary_condition.py index 008cc9a..bf1eef2 100644 --- a/xlb/operator/boundary_condition/boundary_condition.py +++ b/xlb/operator/boundary_condition/boundary_condition.py @@ -75,32 +75,7 @@ def prepare_bc_auxilary_data( return f_post @wp.func - def _get_thread_data_2d( - f_pre: wp.array3d(dtype=Any), - f_post: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - index: wp.vec2i, - ): - # Get the boundary id and missing mask - _f_pre = _f_vec() - _f_post = _f_vec() - _boundary_id = bc_mask[0, index[0], index[1]] - _missing_mask = _missing_mask_vec() - for l in range(self.velocity_set.q): - # q-sized vector of populations - _f_pre[l] = self.compute_dtype(f_pre[l, index[0], index[1]]) - _f_post[l] = self.compute_dtype(f_post[l, index[0], index[1]]) - - # TODO fix vec bool - if missing_mask[l, index[0], index[1]]: - _missing_mask[l] = wp.uint8(1) - else: - _missing_mask[l] = wp.uint8(0) - return _f_pre, _f_post, _boundary_id, _missing_mask - - @wp.func - def _get_thread_data_3d( + def _get_thread_data( f_pre: wp.array4d(dtype=Any), f_post: wp.array4d(dtype=Any), bc_mask: wp.array4d(dtype=wp.uint8), @@ -126,8 +101,7 @@ def _get_thread_data_3d( # Construct some helper warp functions for getting tid data if self.compute_backend == ComputeBackend.WARP: - self._get_thread_data_2d = _get_thread_data_2d - self._get_thread_data_3d = _get_thread_data_3d + self._get_thread_data = _get_thread_data self.prepare_bc_auxilary_data = prepare_bc_auxilary_data @partial(jit, static_argnums=(0,), inline=True) @@ -137,3 +111,38 @@ def prepare_bc_auxilary_data(self, f_pre, f_post, bc_mask, missing_mask): currently being called after collision only. """ return f_post + + def _construct_kernel(self, functional): + """ + Constructs the warp kernel for the boundary condition. + The functional is specific to each boundary condition and should be passed as an argument. + """ + _id = wp.uint8(self.id) + + # Construct the warp kernel + @wp.kernel + def kernel( + f_pre: wp.array4d(dtype=Any), + f_post: wp.array4d(dtype=Any), + bc_mask: wp.array4d(dtype=wp.uint8), + missing_mask: wp.array4d(dtype=wp.bool), + ): + # Get the global index + i, j, k = wp.tid() + index = wp.vec3i(i, j, k) + + # read tid data + _f_pre, _f_post, _boundary_id, _missing_mask = self._get_thread_data(f_pre, f_post, bc_mask, missing_mask, index) + + # Apply the boundary condition + if _boundary_id == _id: + timestep = 0 + _f = functional(index, timestep, _missing_mask, f_pre, f_post, _f_pre, _f_post) + else: + _f = _f_post + + # Write the result + for l in range(self.velocity_set.q): + f_post[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) + + return kernel diff --git a/xlb/operator/boundary_masker/indices_boundary_masker.py b/xlb/operator/boundary_masker/indices_boundary_masker.py index e7e50d7..0c1f7e1 100644 --- a/xlb/operator/boundary_masker/indices_boundary_masker.py +++ b/xlb/operator/boundary_masker/indices_boundary_masker.py @@ -31,18 +31,12 @@ def are_indices_in_interior(self, indices, shape): Check if each 2D or 3D index is inside the bounds of the domain with the given shape and not at its boundary. - :param indices: List of tuples, where each tuple contains indices for each dimension. + :param indices: Array of indices, where each column contains indices for each dimension. :param shape: Tuple representing the shape of the domain (nx, ny) for 2D or (nx, ny, nz) for 3D. - :return: List of boolean flags where each flag indicates whether the corresponding index is inside the bounds. + :return: Array of boolean flags where each flag indicates whether the corresponding index is inside the bounds. """ - # Ensure that the number of dimensions in indices matches the domain shape - dim = len(shape) - if len(indices) != dim: - raise ValueError(f"Indices tuple must have {dim} dimensions to match the domain shape.") - - # Check each index tuple and return a list of boolean flags - flags = [all(0 < idx[d] < shape[d] - 1 for d in range(dim)) for idx in np.array(indices).T] - return flags + shape_array = np.array(shape) + return np.all((indices > 0) & (indices < shape_array[:, np.newaxis] - 1), axis=0) @Operator.register_backend(ComputeBackend.JAX) # TODO HS: figure out why uncommenting the line below fails unlike other operators! @@ -70,11 +64,12 @@ def jax_implementation(self, bclist, bc_mask, missing_mask, start_index=None): assert bc.indices is not None, f"Please specify indices associated with the {bc.__class__.__name__} BC!" assert bc.mesh_vertices is None, f"Please use MeshBoundaryMasker operator if {bc.__class__.__name__} is imposed on a mesh (e.g. STL)!" id_number = bc.id - local_indices = np.array(bc.indices) - np.array(start_index)[:, np.newaxis] + bc_indices = np.array(bc.indices) + local_indices = bc_indices - np.array(start_index)[:, np.newaxis] padded_indices = local_indices + np.array(shift_tup)[:, np.newaxis] bmap = bmap.at[tuple(padded_indices)].set(id_number) - if any(self.are_indices_in_interior(bc.indices, domain_shape)) and bc.needs_padding: - # checking if all indices associated with this BC are in the interior of the domain (not at the boundary). + if any(self.are_indices_in_interior(bc_indices, domain_shape)) and bc.needs_padding: + # checking if all indices associated with this BC are in the interior of the domain. # This flag is needed e.g. if the no-slip geometry is anywhere but at the boundaries of the computational domain. if dim == 2: grid_mask = grid_mask.at[:, padded_indices[0], padded_indices[1]].set(True) @@ -108,59 +103,9 @@ def check_index_bounds(index: wp.vec3i, shape: wp.vec3i): is_in_bounds = index[0] >= 0 and index[0] < shape[0] and index[1] >= 0 and index[1] < shape[1] and index[2] >= 0 and index[2] < shape[2] return is_in_bounds - # Construct the warp 2D kernel - @wp.kernel - def kernel2d( - indices: wp.array2d(dtype=wp.int32), - id_number: wp.array1d(dtype=wp.uint8), - is_interior: wp.array1d(dtype=wp.bool), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - start_index: wp.vec2i, - ): - # Get the index of indices - ii = wp.tid() - - # Get local indices - index = wp.vec2i() - index[0] = indices[0, ii] - start_index[0] - index[1] = indices[1, ii] - start_index[1] - - # Check if index is in bounds - if index[0] >= 0 and index[0] < missing_mask.shape[1] and index[1] >= 0 and index[1] < missing_mask.shape[2]: - # Stream indices - for l in range(_q): - # Get the index of the streaming direction - pull_index = wp.vec2i() - push_index = wp.vec2i() - for d in range(self.velocity_set.d): - pull_index[d] = index[d] - _c[d, l] - push_index[d] = index[d] + _c[d, l] - - # set bc_mask for all bc indices - bc_mask[0, index[0], index[1]] = id_number[ii] - - # check if pull index is out of bound - # These directions will have missing information after streaming - if pull_index[0] < 0 or pull_index[0] >= missing_mask.shape[1] or pull_index[1] < 0 or pull_index[1] >= missing_mask.shape[2]: - # Set the missing mask - missing_mask[l, index[0], index[1]] = True - - # handling geometries in the interior of the computational domain - elif ( - is_interior[ii] - and push_index[0] >= 0 - and push_index[0] < missing_mask.shape[1] - and push_index[1] >= 0 - and push_index[1] < missing_mask.shape[2] - ): - # Set the missing mask - missing_mask[l, push_index[0], push_index[1]] = True - bc_mask[0, push_index[0], push_index[1]] = id_number[ii] - # Construct the warp 3D kernel @wp.kernel - def kernel3d( + def kernel( indices: wp.array2d(dtype=wp.int32), id_number: wp.array1d(dtype=wp.uint8), is_interior: wp.array1d(dtype=wp.bool), @@ -204,47 +149,72 @@ def kernel3d( missing_mask[l, push_index[0], push_index[1], push_index[2]] = True bc_mask[0, push_index[0], push_index[1], push_index[2]] = id_number[ii] - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return None, kernel @Operator.register_backend(ComputeBackend.WARP) def warp_implementation(self, bclist, bc_mask, missing_mask, start_index=None): - dim = self.velocity_set.d - index_list = [[] for _ in range(dim)] - id_list = [] - is_interior = [] + # Pre-allocate arrays with maximum possible size + max_size = sum(len(bc.indices[0]) if isinstance(bc.indices, list) else bc.indices.shape[1] for bc in bclist if bc.indices is not None) + indices = np.zeros((3, max_size), dtype=np.int32) + id_numbers = np.zeros(max_size, dtype=np.uint8) + is_interior = np.zeros(max_size, dtype=bool) + + current_index = 0 for bc in bclist: assert bc.indices is not None, f'Please specify indices associated with the {bc.__class__.__name__} BC using keyword "indices"!' assert bc.mesh_vertices is None, f"Please use MeshBoundaryMasker operator if {bc.__class__.__name__} is imposed on a mesh (e.g. STL)!" - for d in range(dim): - index_list[d] += bc.indices[d] - id_list += [bc.id] * len(bc.indices[0]) - is_interior += self.are_indices_in_interior(bc.indices, bc_mask[0].shape) if bc.needs_padding else [False] * len(bc.indices[0]) - # We are done with bc.indices. Remove them from BC objects + bc_indices = np.asarray(bc.indices) + num_indices = bc_indices.shape[1] + + # Ensure indices are 3D + if bc_indices.shape[0] == 2: + bc_indices = np.vstack([bc_indices, np.zeros(num_indices, dtype=int)]) + + # Add indices to the pre-allocated array + indices[:, current_index : current_index + num_indices] = bc_indices + + # Set id numbers + id_numbers[current_index : current_index + num_indices] = bc.id + + # Set is_interior flags + if bc.needs_padding: + is_interior[current_index : current_index + num_indices] = self.are_indices_in_interior(bc_indices, bc_mask[0].shape) + else: + is_interior[current_index : current_index + num_indices] = False + + current_index += num_indices + + # Remove indices from BC objects bc.__dict__.pop("indices", None) - # convert to warp arrays - indices = wp.array2d(index_list, dtype=wp.int32) - id_number = wp.array1d(id_list, dtype=wp.uint8) - is_interior = wp.array1d(is_interior, dtype=wp.bool) + # Trim arrays to actual size + indices = indices[:, :current_index] + id_numbers = id_numbers[:current_index] + is_interior = is_interior[:current_index] + + # Convert to Warp arrays + wp_indices = wp.array(indices, dtype=wp.int32) + wp_id_numbers = wp.array(id_numbers, dtype=wp.uint8) + wp_is_interior = wp.array(is_interior, dtype=wp.bool) if start_index is None: - start_index = (0,) * dim + start_index = wp.vec3i(0, 0, 0) + else: + start_index = wp.vec3i(*start_index) # Launch the warp kernel wp.launch( self.warp_kernel, + dim=current_index, inputs=[ - indices, - id_number, - is_interior, + wp_indices, + wp_id_numbers, + wp_is_interior, bc_mask, missing_mask, start_index, ], - dim=indices.shape[1], ) return bc_mask, missing_mask diff --git a/xlb/operator/collision/bgk.py b/xlb/operator/collision/bgk.py index 60f63ef..115ed9a 100644 --- a/xlb/operator/collision/bgk.py +++ b/xlb/operator/collision/bgk.py @@ -36,34 +36,7 @@ def functional(f: Any, feq: Any, rho: Any, u: Any): # Construct the warp kernel @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - feq: wp.array3d(dtype=Any), - fout: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # Load needed values - _f = _f_vec() - _feq = _f_vec() - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _feq[l] = feq[l, index[0], index[1]] - - # Compute the collision - _fout = functional(_f, _feq, rho, u) - - # Write the result - for l in range(self.velocity_set.q): - fout[l, index[0], index[1]] = self.store_dtype(_fout[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), feq: wp.array4d(dtype=Any), fout: wp.array4d(dtype=Any), @@ -88,8 +61,6 @@ def kernel3d( for l in range(self.velocity_set.q): fout[l, index[0], index[1], index[2]] = self.store_dtype(_fout[l]) - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/collision/forced_collision.py b/xlb/operator/collision/forced_collision.py index 31ef392..2036bab 100644 --- a/xlb/operator/collision/forced_collision.py +++ b/xlb/operator/collision/forced_collision.py @@ -52,39 +52,7 @@ def functional(f: Any, feq: Any, rho: Any, u: Any): # Construct the warp kernel @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - feq: wp.array3d(dtype=Any), - fout: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) # TODO: Warp needs to fix this - - # Load needed values - _f = _f_vec() - _feq = _f_vec() - _d = self.velocity_set.d - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _feq[l] = feq[l, index[0], index[1]] - _u = _u_vec() - for l in range(_d): - _u[l] = u[l, index[0], index[1]] - _rho = rho[0, index[0], index[1]] - - # Compute the collision - _fout = functional(_f, _feq, _rho, _u) - - # Write the result - for l in range(self.velocity_set.q): - fout[l, index[0], index[1]] = _fout[l] - - # Construct the warp kernel - @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), feq: wp.array4d(dtype=Any), fout: wp.array4d(dtype=Any), @@ -114,8 +82,6 @@ def kernel3d( for l in range(self.velocity_set.q): fout[l, index[0], index[1], index[2]] = _fout[l] - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/collision/kbc.py b/xlb/operator/collision/kbc.py index bbc5d15..5c5c29e 100644 --- a/xlb/operator/collision/kbc.py +++ b/xlb/operator/collision/kbc.py @@ -263,30 +263,7 @@ def entropic_scalar_product( # Construct the functional @wp.func - def functional2d( - f: Any, - feq: Any, - rho: Any, - u: Any, - ): - # Compute shear and delta_s - fneq = f - feq - shear = decompose_shear_d2q9(fneq) - delta_s = shear * rho # TODO: Check this - - # Perform collision - delta_h = fneq - delta_s - two = self.compute_dtype(2.0) - gamma = _inv_beta - (two - _inv_beta) * entropic_scalar_product(delta_s, delta_h, feq) / ( - _epsilon + entropic_scalar_product(delta_h, delta_h, feq) - ) - fout = f - _beta * (two * delta_s + gamma * delta_h) - - return fout - - # Construct the functional - @wp.func - def functional3d( + def functional( f: Any, feq: Any, rho: Any, @@ -309,39 +286,7 @@ def functional3d( # Construct the warp kernel @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - feq: wp.array3d(dtype=Any), - fout: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) # TODO: Warp needs to fix this - - # Load needed values - _f = _f_vec() - _feq = _f_vec() - _d = self.velocity_set.d - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _feq[l] = feq[l, index[0], index[1]] - _u = _u_vec() - for l in range(_d): - _u[l] = u[l, index[0], index[1]] - _rho = rho[0, index[0], index[1]] - - # Compute the collision - _fout = functional(_f, _feq, _rho, _u) - - # Write the result - for l in range(self.velocity_set.q): - fout[l, index[0], index[1]] = self.store_dtype(_fout[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), feq: wp.array4d(dtype=Any), fout: wp.array4d(dtype=Any), @@ -371,9 +316,6 @@ def kernel3d( for l in range(self.velocity_set.q): fout[l, index[0], index[1], index[2]] = self.store_dtype(_fout[l]) - functional = functional3d if self.velocity_set.d == 3 else functional2d - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/equilibrium/quadratic_equilibrium.py b/xlb/operator/equilibrium/quadratic_equilibrium.py index ba337f0..62cc041 100644 --- a/xlb/operator/equilibrium/quadratic_equilibrium.py +++ b/xlb/operator/equilibrium/quadratic_equilibrium.py @@ -61,7 +61,7 @@ def functional( # Construct the warp kernel @wp.kernel - def kernel3d( + def kernel( rho: wp.array4d(dtype=Any), u: wp.array4d(dtype=Any), f: wp.array4d(dtype=Any), @@ -81,29 +81,6 @@ def kernel3d( for l in range(self.velocity_set.q): f[l, index[0], index[1], index[2]] = self.store_dtype(feq[l]) - @wp.kernel - def kernel2d( - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - f: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # Get the equilibrium - _u = _u_vec() - for d in range(self.velocity_set.d): - _u[d] = u[d, index[0], index[1]] - _rho = rho[0, index[0], index[1]] - feq = functional(_rho, _u) - - # Set the output - for l in range(self.velocity_set.q): - f[l, index[0], index[1]] = self.store_dtype(feq[l]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/force/exact_difference_force.py b/xlb/operator/force/exact_difference_force.py index b4da602..ec1ef5b 100644 --- a/xlb/operator/force/exact_difference_force.py +++ b/xlb/operator/force/exact_difference_force.py @@ -86,33 +86,7 @@ def functional(f_postcollision: Any, feq: Any, rho: Any, u: Any): # Construct the warp kernel @wp.kernel - def kernel2d( - f_postcollision: Any, - feq: Any, - fout: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # Load needed values - _u = _u_vec() - for l in range(_d): - _u[l] = u[l, index[0], index[1]] - _rho = rho[0, index[0], index[1]] - - # Compute the collision - _fout = functional(f_postcollision, feq, _rho, _u) - - # Write the result - for l in range(self.velocity_set.q): - fout[l, index[0], index[1]] = self.store_dtype(_fout[l]) - - # Construct the warp kernel - @wp.kernel - def kernel3d( + def kernel( f_postcollision: Any, feq: Any, fout: wp.array4d(dtype=Any), @@ -136,7 +110,6 @@ def kernel3d( for l in range(self.velocity_set.q): fout[l, index[0], index[1], index[2]] = self.store_dtype(_fout[l]) - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/force/momentum_transfer.py b/xlb/operator/force/momentum_transfer.py index 8b0aacf..dbd5307 100644 --- a/xlb/operator/force/momentum_transfer.py +++ b/xlb/operator/force/momentum_transfer.py @@ -102,62 +102,7 @@ def _construct_warp(self): # Construct the warp kernel @wp.kernel - def kernel2d( - f_0: wp.array3d(dtype=Any), - f_1: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=wp.uint8), - missing_mask: wp.array3d(dtype=wp.bool), - force: wp.array(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # Get the boundary id - _boundary_id = bc_mask[0, index[0], index[1]] - _missing_mask = _missing_mask_vec() - for l in range(self.velocity_set.q): - # TODO fix vec bool - if missing_mask[l, index[0], index[1]]: - _missing_mask[l] = wp.uint8(1) - else: - _missing_mask[l] = wp.uint8(0) - - # Determin if boundary is an edge by checking if center is missing - is_edge = wp.bool(False) - if _boundary_id == wp.uint8(_no_slip_id): - if _missing_mask[_zero_index] == wp.uint8(0): - is_edge = wp.bool(True) - - # If the boundary is an edge then add the momentum transfer - m = _u_vec() - if is_edge: - # Get the distribution function - f_post_collision = _f_vec() - for l in range(self.velocity_set.q): - f_post_collision[l] = f_0[l, index[0], index[1]] - - # Apply streaming (pull method) - timestep = 0 - f_post_stream = self.stream.warp_functional(f_0, index) - f_post_stream = self.no_slip_bc_instance.warp_functional(index, timestep, _missing_mask, f_0, f_1, f_post_collision, f_post_stream) - - # Compute the momentum transfer - for d in range(self.velocity_set.d): - m[d] = self.compute_dtype(0.0) - for l in range(self.velocity_set.q): - if _missing_mask[l] == wp.uint8(1): - phi = f_post_collision[_opp_indices[l]] + f_post_stream[l] - if _c[d, _opp_indices[l]] == 1: - m[d] += phi - elif _c[d, _opp_indices[l]] == -1: - m[d] -= phi - - wp.atomic_add(force, 0, m) - - # Construct the warp kernel - @wp.kernel - def kernel3d( + def kernel( f_0: wp.array4d(dtype=Any), f_1: wp.array4d(dtype=Any), bc_mask: wp.array4d(dtype=wp.uint8), @@ -210,9 +155,6 @@ def kernel3d( wp.atomic_add(force, 0, m) - # Return the correct kernel - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return None, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/macroscopic/first_moment.py b/xlb/operator/macroscopic/first_moment.py index 329a71f..cb99a9f 100644 --- a/xlb/operator/macroscopic/first_moment.py +++ b/xlb/operator/macroscopic/first_moment.py @@ -38,7 +38,7 @@ def functional( return u @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), rho: wp.array4d(dtype=Any), u: wp.array4d(dtype=Any), @@ -55,26 +55,6 @@ def kernel3d( for d in range(self.velocity_set.d): u[d, index[0], index[1], index[2]] = self.store_dtype(_u[d]) - @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - ): - i, j = wp.tid() - index = wp.vec2i(i, j) - - _f = _f_vec() - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _rho = rho[0, index[0], index[1]] - _u = functional(_f, _rho) - - for d in range(self.velocity_set.d): - u[d, index[0], index[1]] = self.store_dtype(_u[d]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/macroscopic/macroscopic.py b/xlb/operator/macroscopic/macroscopic.py index b574436..ab1193b 100644 --- a/xlb/operator/macroscopic/macroscopic.py +++ b/xlb/operator/macroscopic/macroscopic.py @@ -37,7 +37,7 @@ def functional(f: _f_vec): return rho, u @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), rho: wp.array4d(dtype=Any), u: wp.array4d(dtype=Any), @@ -54,26 +54,6 @@ def kernel3d( for d in range(self.velocity_set.d): u[d, index[0], index[1], index[2]] = self.store_dtype(_u[d]) - @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - u: wp.array3d(dtype=Any), - ): - i, j = wp.tid() - index = wp.vec2i(i, j) - - _f = _f_vec() - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _rho, _u = functional(_f) - - rho[0, index[0], index[1]] = self.store_dtype(_rho) - for d in range(self.velocity_set.d): - u[d, index[0], index[1]] = self.store_dtype(_u[d]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/macroscopic/second_moment.py b/xlb/operator/macroscopic/second_moment.py index 687b38a..6c7e70e 100644 --- a/xlb/operator/macroscopic/second_moment.py +++ b/xlb/operator/macroscopic/second_moment.py @@ -79,7 +79,7 @@ def functional( # Construct the kernel @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), pi: wp.array4d(dtype=Any), ): @@ -97,27 +97,6 @@ def kernel3d( for d in range(_pi_dim): pi[d, index[0], index[1], index[2]] = self.store_dtype(_pi[d]) - @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - pi: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # Get the equilibrium - _f = _f_vec() - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _pi = functional(_f) - - # Set the output - for d in range(_pi_dim): - pi[d, index[0], index[1]] = self.store_dtype(_pi[d]) - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/macroscopic/zero_moment.py b/xlb/operator/macroscopic/zero_moment.py index d0fbf51..8abb4de 100644 --- a/xlb/operator/macroscopic/zero_moment.py +++ b/xlb/operator/macroscopic/zero_moment.py @@ -27,7 +27,7 @@ def functional(f: _f_vec): return rho @wp.kernel - def kernel3d( + def kernel( f: wp.array4d(dtype=Any), rho: wp.array4d(dtype=Any), ): @@ -41,23 +41,6 @@ def kernel3d( rho[0, index[0], index[1], index[2]] = _rho - @wp.kernel - def kernel2d( - f: wp.array3d(dtype=Any), - rho: wp.array3d(dtype=Any), - ): - i, j = wp.tid() - index = wp.vec2i(i, j) - - _f = _f_vec() - for l in range(self.velocity_set.q): - _f[l] = f[l, index[0], index[1]] - _rho = functional(_f) - - rho[0, index[0], index[1]] = _rho - - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/stepper/nse_stepper.py b/xlb/operator/stepper/nse_stepper.py index 99431eb..e08e95c 100644 --- a/xlb/operator/stepper/nse_stepper.py +++ b/xlb/operator/stepper/nse_stepper.py @@ -140,27 +140,7 @@ def apply_bc( return f_result @wp.func - def get_thread_data_2d( - f0_buffer: wp.array3d(dtype=Any), - f1_buffer: wp.array3d(dtype=Any), - missing_mask: wp.array3d(dtype=Any), - index: Any, - ): - # Read thread data for populations and missing mask - _f0_thread = _f_vec() - _f1_thread = _f_vec() - _missing_mask = _missing_mask_vec() - for l in range(self.velocity_set.q): - _f0_thread[l] = self.compute_dtype(f0_buffer[l, index[0], index[1]]) - _f1_thread[l] = self.compute_dtype(f1_buffer[l, index[0], index[1]]) - if missing_mask[l, index[0], index[1]]: - _missing_mask[l] = wp.uint8(1) - else: - _missing_mask[l] = wp.uint8(0) - return _f0_thread, _f1_thread, _missing_mask - - @wp.func - def get_thread_data_3d( + def get_thread_data( f0_buffer: wp.array4d(dtype=Any), f1_buffer: wp.array4d(dtype=Any), missing_mask: wp.array4d(dtype=Any), @@ -182,47 +162,7 @@ def get_thread_data_3d( return _f0_thread, _f1_thread, _missing_mask @wp.kernel - def kernel2d( - f_0: wp.array3d(dtype=Any), - f_1: wp.array3d(dtype=Any), - bc_mask: wp.array3d(dtype=Any), - missing_mask: wp.array3d(dtype=Any), - timestep: int, - ): - i, j = wp.tid() - index = wp.vec2i(i, j) - - _boundary_id = bc_mask[0, index[0], index[1]] - if _boundary_id == wp.uint8(255): - return - - # Apply streaming - _f_post_stream = self.stream.warp_functional(f_0, index) - - _f0_thread, _f1_thread, _missing_mask = get_thread_data_2d(f_0, f_1, missing_mask, index) - _f_post_collision = _f0_thread - - # Apply post-streaming boundary conditions - _f_post_stream = apply_bc(index, timestep, _boundary_id, _missing_mask, f_0, f_1, _f_post_collision, _f_post_stream, True) - - # Compute rho and u - _rho, _u = self.macroscopic.warp_functional(_f_post_stream) - - # Compute equilibrium - _feq = self.equilibrium.warp_functional(_rho, _u) - - # Apply collision - _f_post_collision = self.collision.warp_functional(_f_post_stream, _feq, _rho, _u) - - # Apply post-collision boundary conditions - _f_post_collision = apply_bc(index, timestep, _boundary_id, _missing_mask, f_0, f_1, _f_post_stream, _f_post_collision, False) - - # Store the result in f_1 - for l in range(self.velocity_set.q): - f_1[l, index[0], index[1]] = self.store_dtype(_f_post_collision[l]) - - @wp.kernel - def kernel3d( + def kernel( f_0: wp.array4d(dtype=Any), f_1: wp.array4d(dtype=Any), bc_mask: wp.array4d(dtype=Any), @@ -239,7 +179,7 @@ def kernel3d( # Apply streaming _f_post_stream = self.stream.warp_functional(f_0, index) - _f0_thread, _f1_thread, _missing_mask = get_thread_data_3d(f_0, f_1, missing_mask, index) + _f0_thread, _f1_thread, _missing_mask = get_thread_data(f_0, f_1, missing_mask, index) _f_post_collision = _f0_thread # Apply post-streaming boundary conditions @@ -261,9 +201,6 @@ def kernel3d( f_0[_opp_indices[l], index[0], index[1], index[2]] = self.store_dtype(_f1_thread[_opp_indices[l]]) f_1[l, index[0], index[1], index[2]] = self.store_dtype(_f_post_collision[l]) - # Return the correct kernel - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return None, kernel @Operator.register_backend(ComputeBackend.WARP) diff --git a/xlb/operator/stream/stream.py b/xlb/operator/stream/stream.py index dc2417a..247fa5a 100644 --- a/xlb/operator/stream/stream.py +++ b/xlb/operator/stream/stream.py @@ -55,50 +55,9 @@ def _construct_warp(self): _c = self.velocity_set.c _f_vec = wp.vec(self.velocity_set.q, dtype=self.compute_dtype) - # Construct the warp functional - @wp.func - def functional2d( - f: wp.array3d(dtype=Any), - index: Any, - ): - # Pull the distribution function - _f = _f_vec() - for l in range(self.velocity_set.q): - # Get pull index - pull_index = type(index)() - for d in range(self.velocity_set.d): - pull_index[d] = index[d] - _c[d, l] - - # impose periodicity for out of bound values - if pull_index[d] < 0: - pull_index[d] = f.shape[d + 1] - 1 - elif pull_index[d] >= f.shape[d + 1]: - pull_index[d] = 0 - - # Read the distribution function - _f[l] = self.compute_dtype(f[l, pull_index[0], pull_index[1]]) - - return _f - - @wp.kernel - def kernel2d( - f_0: wp.array3d(dtype=Any), - f_1: wp.array3d(dtype=Any), - ): - # Get the global index - i, j = wp.tid() - index = wp.vec2i(i, j) - - # Set the output - _f = functional2d(f_0, index) - - # Write the output - for l in range(self.velocity_set.q): - f_1[l, index[0], index[1]] = self.store_dtype(_f[l]) - # Construct the funcional to get streamed indices @wp.func - def functional3d( + def functional( f: wp.array4d(dtype=Any), index: Any, ): @@ -124,7 +83,7 @@ def functional3d( # Construct the warp kernel @wp.kernel - def kernel3d( + def kernel( f_0: wp.array4d(dtype=Any), f_1: wp.array4d(dtype=Any), ): @@ -133,15 +92,12 @@ def kernel3d( index = wp.vec3i(i, j, k) # Set the output - _f = functional3d(f_0, index) + _f = functional(f_0, index) # Write the output for l in range(self.velocity_set.q): f_1[l, index[0], index[1], index[2]] = self.store_dtype(_f[l]) - functional = functional3d if self.velocity_set.d == 3 else functional2d - kernel = kernel3d if self.velocity_set.d == 3 else kernel2d - return functional, kernel @Operator.register_backend(ComputeBackend.WARP)