From 6021fd55ae0bdd650984966157083d59a6d00923 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B8rgen=20Schartum=20Dokken?= <dokken92@gmail.com>
Date: Tue, 24 Sep 2024 10:59:09 +0200
Subject: [PATCH 1/7] Fix mesh constructor again (#3432)

* Fix mesh constructor again

* Ruff

* Add trest
---
 python/dolfinx/mesh.py                         |  4 +++-
 python/test/unit/refinement/test_refinement.py | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/python/dolfinx/mesh.py b/python/dolfinx/mesh.py
index 8ad6fe3c02d..28b9eb759ea 100644
--- a/python/dolfinx/mesh.py
+++ b/python/dolfinx/mesh.py
@@ -557,7 +557,9 @@ def refine(
     mesh1, parent_cell, parent_facet = _cpp.refinement.refine(
         mesh._cpp_object, edges, redistribute, ghost_mode, option
     )
-    return Mesh(mesh1, mesh._ufl_domain), parent_cell, parent_facet
+    # Create new ufl domain as it will carry a reference to the C++ mesh in the ufl_cargo
+    ufl_domain = ufl.Mesh(mesh._ufl_domain.ufl_coordinate_element())  # type: ignore
+    return Mesh(mesh1, ufl_domain), parent_cell, parent_facet
 
 
 def create_mesh(
diff --git a/python/test/unit/refinement/test_refinement.py b/python/test/unit/refinement/test_refinement.py
index 440057ce265..6aaa4090dfb 100644
--- a/python/test/unit/refinement/test_refinement.py
+++ b/python/test/unit/refinement/test_refinement.py
@@ -1,5 +1,5 @@
-# Copyright (C) 2018-2021 Chris N Richardson and Jørgen S. Dokken
-#
+# Copyright (C) 2018-2024 Chris N Richardson and Jørgen S. Dokken
+
 # This file is part of DOLFINx (https://www.fenicsproject.org)
 #
 # SPDX-License-Identifier:    LGPL-3.0-or-later
@@ -212,3 +212,10 @@ def test_refine_cell_meshtag(tdim, refine_plaza_wrapper):
     new_meshtag = transfer_meshtag(meshtag, fine_mesh, parent_cell)
     assert sum(new_meshtag.values) == (tdim * 4 - 4) * sum(meshtag.values)
     assert len(new_meshtag.indices) == (tdim * 4 - 4) * len(meshtag.indices)
+
+
+def test_refine_ufl_cargo():
+    mesh = create_unit_cube(MPI.COMM_WORLD, 4, 3, 3)
+    mesh.topology.create_entities(1)
+    refined_mesh, _, _ = refine(mesh, redistribute=False)
+    assert refined_mesh.ufl_domain().ufl_cargo() != mesh.ufl_domain().ufl_cargo()

From 0da58d1ef4258652d35ae97689d8f6cd9e0e3951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B8rgen=20Schartum=20Dokken?= <dokken92@gmail.com>
Date: Tue, 24 Sep 2024 13:17:38 +0200
Subject: [PATCH 2/7] Remove NBX call from sub-IndexMap creation (#3392)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Remove nbx call

* Simplify (and verify)

* Start the complicated process of sending new dest ranks after determining ownership

* Communicate new dest ranks

* Fix code (with verification)

* Remove verification

* Remove iostream

* Revert loop we do not use

* Add back linebreak

* Remove duplicate sort

* Fix source and dest confusion in alltoall

* Remove nbx comparison

* Fix typo

* Update cpp/dolfinx/common/IndexMap.cpp

Co-authored-by: Joe Dean <jpd62@cam.ac.uk>

* Update cpp/dolfinx/common/IndexMap.cpp

Co-authored-by: Jørgen Schartum Dokken <dokken92@gmail.com>

* Update cpp/dolfinx/common/IndexMap.cpp

Co-authored-by: Jørgen Schartum Dokken <dokken92@gmail.com>

* Update cpp/dolfinx/common/IndexMap.cpp

Co-authored-by: Jørgen Schartum Dokken <dokken92@gmail.com>

* Simplifications

* Logic simplifications

* Small edits

---------

Co-authored-by: Garth N. Wells <gnw20@cam.ac.uk>
Co-authored-by: Joe Dean <jpd62@cam.ac.uk>
---
 cpp/dolfinx/common/IndexMap.cpp | 149 +++++++++++++++++++++++++-------
 1 file changed, 116 insertions(+), 33 deletions(-)

diff --git a/cpp/dolfinx/common/IndexMap.cpp b/cpp/dolfinx/common/IndexMap.cpp
index 180254c0422..dd3ae3a7145 100644
--- a/cpp/dolfinx/common/IndexMap.cpp
+++ b/cpp/dolfinx/common/IndexMap.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2015-2022 Chris Richardson, Garth N. Wells and Igor Baratta
+// Copyright (C) 2015-2024 Chris Richardson, Garth N. Wells, Igor Baratta,
+// Joseph P. Dean and Jørgen S. Dokken
 //
 // This file is part of DOLFINx (https://www.fenicsproject.org)
 //
@@ -98,8 +99,7 @@ communicate_ghosts_to_owners(MPI_Comm comm, std::span<const int> src,
     {
       auto it = std::ranges::lower_bound(src, owners[i]);
       assert(it != src.end() and *it == owners[i]);
-      std::size_t r = std::distance(src.begin(), it);
-      if (include_ghost[i])
+      if (std::size_t r = std::distance(src.begin(), it); include_ghost[i])
       {
         send_data[r].push_back(ghosts[i]);
         pos_to_ghost[r].push_back(i);
@@ -191,16 +191,20 @@ compute_submap_indices(const IndexMap& imap,
           std::span(is_in_submap.cbegin() + imap.size_local(),
                     is_in_submap.cend()));
 
-  // --- Step 2 ---: Create a map from the indices in `recv_indices` (i.e.
-  // indices owned by this process that are in `indices` on other processes) to
-  // their owner in the submap. This is required since not all indices in
-  // `recv_indices` will necessarily be in `indices` on this process, and thus
-  // other processes must own them in the submap.
-
+  // --- Step 2 ---: Create a map from the indices in `recv_indices`
+  // (i.e. indices owned by this process that are in `indices` on other
+  // processes) to their owner in the submap. This is required since not
+  // all indices in `recv_indices` will necessarily be in `indices` on
+  // this process, and thus other processes must own them in the submap.
+  // If ownership of received index doesn't change, then this process
+  // has the receiving rank as a destination
   std::vector<int> recv_owners(send_disp.back());
+  std::vector<int> submap_dest;
+  submap_dest.reserve(1);
   const int rank = dolfinx::MPI::rank(imap.comm());
   {
-    // Flag to track if the owner of any indices have changed in the submap
+    // Flag to track if the owner of any indices have changed in the
+    // submap
     bool owners_changed = false;
 
     // Create a map from (global) indices in `recv_indices` to a list of
@@ -221,11 +225,14 @@ compute_submap_indices(const IndexMap& imap,
         assert(idx_local >= 0);
         assert(idx_local < local_range[1]);
 
-        // Check if index is included in the submap on this process. If so,
-        // this process remains its owner in the submap. Otherwise,
+        // Check if index is included in the submap on this process. If
+        // so, this process remains its owner in the submap. Otherwise,
         // add the process that sent it to the list of possible owners.
         if (is_in_submap[idx_local])
+        {
           global_idx_to_possible_owner.push_back({idx, rank});
+          submap_dest.push_back(dest[i]);
+        }
         else
         {
           owners_changed = true;
@@ -239,18 +246,69 @@ compute_submap_indices(const IndexMap& imap,
 
     std::ranges::sort(global_idx_to_possible_owner);
 
-    // Choose the submap owner for each index in `recv_indices`
+    // Choose the submap owner for each index in `recv_indices` and pack
+    // destination ranks for each process that has received new indices.
+    // During ownership determination, we know what other processes
+    // requires this index, and add them to the destination set.
     std::vector<int> send_owners;
-    send_owners.reserve(recv_indices.size());
-    for (auto idx : recv_indices)
+    send_owners.reserve(1);
+    std::vector<int> new_owner_dest_ranks;
+    new_owner_dest_ranks.reserve(1);
+    std::vector<int> new_owner_dest_ranks_offsets(recv_sizes.size() + 1, 0);
+    std::vector<std::int32_t> new_owner_dest_ranks_sizes(recv_sizes.size());
+    new_owner_dest_ranks_sizes.reserve(1);
+    for (std::size_t i = 0; i < recv_sizes.size(); ++i)
     {
-      // NOTE: Could choose new owner in a way that is is better for
-      // load balancing, though the impact is probably only very small
-      auto it = std::ranges::lower_bound(global_idx_to_possible_owner, idx,
-                                         std::ranges::less(),
-                                         [](auto e) { return e.first; });
-      assert(it != global_idx_to_possible_owner.end() and it->first == idx);
-      send_owners.push_back(it->second);
+      for (int j = recv_disp[i]; j < recv_disp[i + 1]; ++j)
+      {
+        std::int64_t idx = recv_indices[j];
+
+        // NOTE: Could choose new owner in a way that is is better for
+        // load balancing, though the impact is probably only very small
+        auto it = std::ranges::lower_bound(global_idx_to_possible_owner, idx,
+                                           std::ranges::less(),
+                                           [](auto e) { return e.first; });
+        assert(it != global_idx_to_possible_owner.end() and it->first == idx);
+        send_owners.push_back(it->second);
+
+        // If rank that sent this ghost is the submap owner, send all
+        // other ranks
+        if (it->second == dest[i])
+        {
+          // Find upper limit of recv index and pack all ranks from
+          // ownership determination (except new owner) as dest ranks
+          auto it_upper = std::ranges::upper_bound(
+              it, global_idx_to_possible_owner.end(), idx, std::ranges::less(),
+              [](auto e) { return e.first; });
+          std::transform(std::next(it), it_upper,
+                         std::back_inserter(new_owner_dest_ranks),
+                         [](auto e) { return e.second; });
+        }
+      }
+
+      // Remove duplicate new dest ranks from recv process. The new
+      // owning process can have taken ownership of multiple indices
+      // from the same rank.
+      if (auto dest_begin = std::next(new_owner_dest_ranks.begin(),
+                                      new_owner_dest_ranks_offsets[i]);
+          dest_begin != new_owner_dest_ranks.end())
+      {
+        std::ranges::sort(dest_begin, new_owner_dest_ranks.end());
+        auto [unique_end, range_end]
+            = std::ranges::unique(dest_begin, new_owner_dest_ranks.end());
+        new_owner_dest_ranks.erase(unique_end, range_end);
+
+        std::size_t num_unique_dest_ranks
+            = std::distance(dest_begin, unique_end);
+        new_owner_dest_ranks_sizes[i] = num_unique_dest_ranks;
+        new_owner_dest_ranks_offsets[i + 1]
+            = new_owner_dest_ranks_offsets[i] + num_unique_dest_ranks;
+      }
+      else
+      {
+        new_owner_dest_ranks_sizes[i] = 0;
+        new_owner_dest_ranks_offsets[i + 1] = new_owner_dest_ranks_offsets[i];
+      }
     }
 
     // Create neighbourhood comm (owner -> ghost)
@@ -267,6 +325,36 @@ compute_submap_indices(const IndexMap& imap,
                                   comm1);
     dolfinx::MPI::check_error(imap.comm(), ierr);
 
+    // Communicate number of received dest_ranks from each process
+    std::vector<int> recv_dest_ranks_sizes(imap.src().size());
+    recv_dest_ranks_sizes.reserve(1);
+    ierr = MPI_Neighbor_alltoall(new_owner_dest_ranks_sizes.data(), 1, MPI_INT,
+                                 recv_dest_ranks_sizes.data(), 1, MPI_INT,
+                                 comm1);
+    dolfinx::MPI::check_error(imap.comm(), ierr);
+
+    // Communicate new dest ranks
+    std::vector<int> recv_dest_rank_disp(imap.src().size() + 1, 0);
+    std::partial_sum(recv_dest_ranks_sizes.begin(), recv_dest_ranks_sizes.end(),
+                     std::next(recv_dest_rank_disp.begin()));
+    std::vector<int> recv_dest_ranks(recv_dest_rank_disp.back());
+    recv_dest_ranks.reserve(1);
+    ierr = MPI_Neighbor_alltoallv(
+        new_owner_dest_ranks.data(), new_owner_dest_ranks_sizes.data(),
+        new_owner_dest_ranks_offsets.data(), MPI_INT, recv_dest_ranks.data(),
+        recv_dest_ranks_sizes.data(), recv_dest_rank_disp.data(), MPI_INT,
+        comm1);
+    dolfinx::MPI::check_error(imap.comm(), ierr);
+
+    // Append new submap dest ranks and remove duplicates
+    std::ranges::copy(recv_dest_ranks, std::back_inserter(submap_dest));
+    std::ranges::sort(submap_dest);
+    {
+      auto [unique_end, range_end] = std::ranges::unique(submap_dest);
+      submap_dest.erase(unique_end, range_end);
+      submap_dest.shrink_to_fit();
+    }
+
     // Free the communicator
     ierr = MPI_Comm_free(&comm1);
     dolfinx::MPI::check_error(imap.comm(), ierr);
@@ -278,6 +366,7 @@ compute_submap_indices(const IndexMap& imap,
   // Local indices (w.r.t. original map) owned by this process in the
   // submap
   std::vector<std::int32_t> submap_owned;
+  submap_owned.reserve(indices.size());
 
   // Local indices (w.r.t. original map) ghosted by this process in the
   // submap
@@ -289,9 +378,9 @@ compute_submap_indices(const IndexMap& imap,
 
   {
     // Add owned indices to submap_owned
-    for (std::int32_t v : indices)
-      if (v < imap.size_local())
-        submap_owned.push_back(v);
+    std::copy_if(
+        indices.begin(), indices.end(), std::back_inserter(submap_owned),
+        [local_size = imap.size_local()](auto i) { return i < local_size; });
 
     // FIXME: Could just create when making send_indices
     std::vector<std::int32_t> send_indices_local(send_indices.size());
@@ -348,12 +437,6 @@ compute_submap_indices(const IndexMap& imap,
     submap_ghost = std::move(submap_ghost1);
   }
 
-  // Compute submap destination ranks
-  // FIXME: Remove call to NBX
-  std::vector<int> submap_dest
-      = dolfinx::MPI::compute_graph_edges_nbx(imap.comm(), submap_src);
-  std::ranges::sort(submap_dest);
-
   return {std::move(submap_owned), std::move(submap_ghost),
           std::move(submap_ghost_owners), std::move(submap_src),
           std::move(submap_dest)};
@@ -1078,8 +1161,8 @@ graph::AdjacencyList<int> IndexMap::index_to_dest_ranks() const
       dolfinx::MPI::check_error(_comm.comm(), ierr);
 
       // Prepare displacement vectors
-      std::vector<int> send_disp(dest.size() + 1, 0),
-          recv_disp(src.size() + 1, 0);
+      std::vector<int> send_disp(dest.size() + 1, 0);
+      std::vector<int> recv_disp(src.size() + 1, 0);
       std::partial_sum(send_sizes.begin(), send_sizes.end(),
                        std::next(send_disp.begin()));
       std::partial_sum(recv_sizes.begin(), recv_sizes.end(),

From d5f56d66dcc561ad017b0d3fde42e59f209f82f5 Mon Sep 17 00:00:00 2001
From: "Garth N. Wells" <gnw20@cam.ac.uk>
Date: Wed, 25 Sep 2024 21:22:17 +0100
Subject: [PATCH 3/7] Remove deprecated method for accessing PETSc Vec in
 Python (#3435)

* Remove deprecated method

* Remove leftover import
---
 python/dolfinx/fem/function.py | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/python/dolfinx/fem/function.py b/python/dolfinx/fem/function.py
index 1c990fb663a..729294c3bbb 100644
--- a/python/dolfinx/fem/function.py
+++ b/python/dolfinx/fem/function.py
@@ -8,7 +8,6 @@
 from __future__ import annotations
 
 import typing
-import warnings
 from functools import singledispatch
 
 import numpy as np
@@ -484,25 +483,6 @@ def x(self) -> la.Vector:
         """Vector holding the degrees-of-freedom."""
         return self._x
 
-    @property
-    def vector(self):
-        """PETSc vector holding the degrees-of-freedom.
-
-        Upon first call, this function creates a PETSc ``Vec`` object
-        that wraps the degree-of-freedom data. The ``Vec`` object is
-        cached and the cached ``Vec`` is returned upon subsequent calls.
-
-        Note:
-            Prefer :func`x` where possible.
-        """
-        warnings.warn(
-            "dolfinx.fem.Function.vector is deprecated.\n"
-            "Please use dolfinx.fem.Function.x.petsc_vec "
-            "to access the underlying petsc4py wrapper",
-            DeprecationWarning,
-        )
-        return self.x.petsc_vec
-
     @property
     def dtype(self) -> np.dtype:
         return np.dtype(self._cpp_object.x.array.dtype)

From 828ae2c0d8b2d8b1bbe061a94a2ab543fad28566 Mon Sep 17 00:00:00 2001
From: "Garth N. Wells" <gnw20@cam.ac.uk>
Date: Thu, 26 Sep 2024 09:43:29 +0100
Subject: [PATCH 4/7] Add `codim_0_assembly` demo to CMake (#3436)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add missing demo to CMake

* Add missing CMake file

* Make demo run in complex mode and parallel

---------

Co-authored-by: Jørgen S. Dokken <dokken@simula.no>
---
 cpp/demo/CMakeLists.txt                   |  3 +-
 cpp/demo/codim_0_assembly/CMakeLists.txt  | 67 +++++++++++++++++++++++
 cpp/demo/codim_0_assembly/main.cpp        |  6 +-
 cpp/demo/codim_0_assembly/mixed_codim0.py |  5 +-
 4 files changed, 75 insertions(+), 6 deletions(-)
 create mode 100644 cpp/demo/codim_0_assembly/CMakeLists.txt

diff --git a/cpp/demo/CMakeLists.txt b/cpp/demo/CMakeLists.txt
index be4d0a305c0..25c5200e6eb 100644
--- a/cpp/demo/CMakeLists.txt
+++ b/cpp/demo/CMakeLists.txt
@@ -16,10 +16,11 @@ macro(add_demo_subdirectory subdir)
 endmacro(add_demo_subdirectory)
 
 # Add demos
+add_demo_subdirectory(biharmonic)
+add_demo_subdirectory(codim_0_assembly)
 add_demo_subdirectory(custom_kernel)
 add_demo_subdirectory(poisson)
 add_demo_subdirectory(poisson_matrix_free)
 add_demo_subdirectory(hyperelasticity)
 add_demo_subdirectory(interpolation-io)
 add_demo_subdirectory(interpolation_different_meshes)
-add_demo_subdirectory(biharmonic)
diff --git a/cpp/demo/codim_0_assembly/CMakeLists.txt b/cpp/demo/codim_0_assembly/CMakeLists.txt
new file mode 100644
index 00000000000..70e9c2b8a7e
--- /dev/null
+++ b/cpp/demo/codim_0_assembly/CMakeLists.txt
@@ -0,0 +1,67 @@
+# This file was generated by running
+#
+# python cmake/scripts/generate-cmakefiles from dolfinx/cpp
+#
+cmake_minimum_required(VERSION 3.19)
+
+set(PROJECT_NAME demo_codim_0_assembly)
+project(${PROJECT_NAME} LANGUAGES C CXX)
+
+# Set C++20 standard
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+if(NOT TARGET dolfinx)
+  find_package(DOLFINX REQUIRED)
+endif()
+
+include(CheckSymbolExists)
+set(CMAKE_REQUIRED_INCLUDES ${PETSC_INCLUDE_DIRS})
+check_symbol_exists(PETSC_USE_COMPLEX petscsystypes.h PETSC_SCALAR_COMPLEX)
+check_symbol_exists(PETSC_USE_REAL_DOUBLE petscsystypes.h PETSC_REAL_DOUBLE)
+
+# Add target to compile UFL files
+if(PETSC_SCALAR_COMPLEX EQUAL 1)
+  if(PETSC_REAL_DOUBLE EQUAL 1)
+    set(SCALAR_TYPE "--scalar_type=complex128")
+  else()
+    set(SCALAR_TYPE "--scalar_type=complex64")
+  endif()
+else()
+  if(PETSC_REAL_DOUBLE EQUAL 1)
+    set(SCALAR_TYPE "--scalar_type=float64")
+  else()
+    set(SCALAR_TYPE "--scalar_type=float32")
+  endif()
+endif()
+add_custom_command(
+  OUTPUT mixed_codim0.c
+  COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/mixed_codim0.py ${SCALAR_TYPE}
+  VERBATIM
+  DEPENDS mixed_codim0.py
+  COMMENT "Compile mixed_codim0.py using FFCx"
+)
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+add_executable(${PROJECT_NAME} main.cpp ${CMAKE_CURRENT_BINARY_DIR}/mixed_codim0.c)
+target_link_libraries(${PROJECT_NAME} dolfinx)
+
+# Do not throw error for 'multi-line comments' (these are typical in rst which
+# includes LaTeX)
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-Wno-comment" HAVE_NO_MULTLINE)
+set_source_files_properties(
+  main.cpp
+  PROPERTIES
+    COMPILE_FLAGS
+    "$<$<BOOL:${HAVE_NO_MULTLINE}>:-Wno-comment -Wall -Wextra -pedantic -Werror>"
+)
+
+# Test targets (used by DOLFINx testing system)
+set(TEST_PARAMETERS2 -np 2 ${MPIEXEC_PARAMS} "./${PROJECT_NAME}")
+set(TEST_PARAMETERS3 -np 3 ${MPIEXEC_PARAMS} "./${PROJECT_NAME}")
+add_test(NAME ${PROJECT_NAME}_mpi_2 COMMAND "mpirun" ${TEST_PARAMETERS2})
+add_test(NAME ${PROJECT_NAME}_mpi_3 COMMAND "mpirun" ${TEST_PARAMETERS3})
+add_test(NAME ${PROJECT_NAME}_serial COMMAND ${PROJECT_NAME})
diff --git a/cpp/demo/codim_0_assembly/main.cpp b/cpp/demo/codim_0_assembly/main.cpp
index c1797e60e66..29c91a21a92 100644
--- a/cpp/demo/codim_0_assembly/main.cpp
+++ b/cpp/demo/codim_0_assembly/main.cpp
@@ -66,7 +66,7 @@ int main(int argc, char* argv[])
           + mesh->topology()->index_map(tdim)->num_ghosts();
     std::vector<std::int32_t> cells(num_cells_local);
     std::iota(cells.begin(), cells.end(), 0);
-    std::vector<std::int32_t> values(cell_map->size_local(), 1);
+    std::vector<std::int32_t> values(num_cells_local, 1);
     std::for_each(marked_cells.begin(), marked_cells.end(),
                   [&values](auto& c) { values[c] = 2; });
     dolfinx::mesh::MeshTags<std::int32_t> cell_marker(mesh->topology(), tdim,
@@ -123,7 +123,7 @@ int main(int argc, char* argv[])
 
     la::SparsityPattern sp_mixed = fem::create_sparsity_pattern(*a_mixed);
     sp_mixed.finalize();
-    la::MatrixCSR<double> A_mixed(sp_mixed);
+    la::MatrixCSR<PetscScalar> A_mixed(sp_mixed);
     fem::assemble_matrix(A_mixed.mat_add_values(), *a_mixed, {});
     A_mixed.scatter_rev();
 
@@ -132,7 +132,7 @@ int main(int argc, char* argv[])
     la::SparsityPattern sp = fem::create_sparsity_pattern(*a);
     sp.finalize();
 
-    la::MatrixCSR<double> A(sp);
+    la::MatrixCSR<PetscScalar> A(sp);
     fem::assemble_matrix(A.mat_add_values(), *a, {});
     A.scatter_rev();
 
diff --git a/cpp/demo/codim_0_assembly/mixed_codim0.py b/cpp/demo/codim_0_assembly/mixed_codim0.py
index e69660b2bc5..24a73493dc8 100644
--- a/cpp/demo/codim_0_assembly/mixed_codim0.py
+++ b/cpp/demo/codim_0_assembly/mixed_codim0.py
@@ -7,6 +7,7 @@
     TestFunction,
     TrialFunction,
     dx,
+    inner,
 )
 
 cell = "quadrilateral"
@@ -27,9 +28,9 @@
 # of the parent mesh. The integration domain is the parent mesh, but we restrict integration
 # to all cells marked with subdomain_id=3, which will indicate what cells of our mesh is part
 # of the submesh
-a_mixed = p * v * dx(domain=mesh, subdomain_id=3)
+a_mixed = inner(p, v) * dx(domain=mesh, subdomain_id=3)
 
 q = TestFunction(W)
-a = p * q * dx(domain=submesh)
+a = inner(p, q) * dx(domain=submesh)
 
 forms = [a_mixed, a]

From 1ce2766ec47f9466fcd09dcdc30724928755c89d Mon Sep 17 00:00:00 2001
From: Chris Richardson <chris@bpi.cam.ac.uk>
Date: Thu, 26 Sep 2024 10:09:30 +0100
Subject: [PATCH 5/7] Remove old demo (#3440)

---
 cpp/demo/mixed_topology/main.cpp | 151 -------------------------------
 1 file changed, 151 deletions(-)
 delete mode 100644 cpp/demo/mixed_topology/main.cpp

diff --git a/cpp/demo/mixed_topology/main.cpp b/cpp/demo/mixed_topology/main.cpp
deleted file mode 100644
index a5e55c48336..00000000000
--- a/cpp/demo/mixed_topology/main.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-// # Mixed topology
-//
-// Experimental demo.
-
-#include <algorithm>
-#include <dolfinx/common/IndexMap.h>
-#include <dolfinx/common/MPI.h>
-#include <dolfinx/graph/AdjacencyList.h>
-#include <dolfinx/mesh/Geometry.h>
-#include <dolfinx/mesh/Mesh.h>
-#include <dolfinx/mesh/Topology.h>
-#include <dolfinx/mesh/cell_types.h>
-#include <iostream>
-#include <vector>
-
-using namespace dolfinx;
-
-// Note: this demo is not currently intended to provide a fully
-// functional example of using a mixed-topology mesh, but shows only the
-// basic construction. Experimental.
-
-int main(int argc, char* argv[])
-{
-  dolfinx::init_logging(argc, argv);
-  MPI_Init(&argc, &argv);
-
-  // Number of square cell in x-direction
-  constexpr int nx_s = 2;
-
-  // Number of triangle cells in x-direction
-  constexpr int nx_t = 2;
-
-  // Number of cells in y-direction
-  constexpr int ny = 4;
-
-  constexpr int num_s = nx_s * ny;
-  constexpr int num_t = 2 * nx_t * ny;
-
-  std::vector<double> x;
-  for (int i = 0; i < nx_s + nx_t + 1; ++i)
-  {
-    for (int j = 0; j < ny + 1; ++j)
-    {
-      x.push_back(i);
-      x.push_back(j);
-    }
-  }
-
-  std::vector<std::int64_t> cells;
-  std::vector<std::int32_t> offsets{0};
-  for (int i = 0; i < nx_s + nx_t; ++i)
-  {
-    for (int j = 0; j < ny; ++j)
-    {
-      const int v_0 = j + i * (ny + 1);
-      const int v_1 = v_0 + 1;
-      const int v_2 = v_0 + ny + 1;
-      const int v_3 = v_0 + ny + 2;
-      if (i < nx_s)
-      {
-        cells.push_back(v_0);
-        cells.push_back(v_1);
-        cells.push_back(v_3);
-        cells.push_back(v_2);
-        offsets.push_back(cells.size());
-      }
-      else
-      {
-        cells.push_back(v_0);
-        cells.push_back(v_1);
-        cells.push_back(v_2);
-        offsets.push_back(cells.size());
-
-        cells.push_back(v_1);
-        cells.push_back(v_2);
-        cells.push_back(v_3);
-        offsets.push_back(cells.size());
-      }
-    }
-  }
-
-  graph::AdjacencyList<std::int64_t> cells_list(cells, offsets);
-  std::vector<std::int64_t> original_global_index(num_s + num_t);
-  std::iota(original_global_index.begin(), original_global_index.end(), 0);
-  std::vector<int> ghost_owners;
-  std::vector<std::int32_t> cell_group_offsets{0, num_s, num_s + num_t,
-                                               num_s + num_t, num_s + num_t};
-  std::vector<std::int64_t> boundary_vertices;
-  for (int j = 0; j < ny + 1; ++j)
-  {
-    boundary_vertices.push_back(j);
-    boundary_vertices.push_back(j + (nx_s + nx_t + 1) * ny);
-  }
-
-  for (int i = 0; i < nx_s + nx_t + 1; ++i)
-  {
-    boundary_vertices.push_back((ny + 1) * i);
-    boundary_vertices.push_back(ny + (ny + 1) * i);
-  }
-
-  std::ranges::sort(boundary_vertices);
-  auto [unique_end, range_end] = std::ranges::unique(boundary_vertices);
-  boundary_vertices.erase(unique_end, range_end);
-
-  std::vector<mesh::CellType> cell_types{mesh::CellType::quadrilateral,
-                                         mesh::CellType::triangle};
-  std::vector<fem::CoordinateElement<double>> elements;
-  for (auto ct : cell_types)
-    elements.push_back(fem::CoordinateElement<double>(ct, 1));
-
-  {
-    auto topo = std::make_shared<mesh::Topology>(mesh::create_topology(
-        MPI_COMM_WORLD, cells_list, original_global_index, ghost_owners,
-        cell_types, cell_group_offsets, boundary_vertices));
-    auto topo_cells = topo->connectivity(2, 0);
-
-    std::cout << "cells\n------\n";
-    for (int i = 0; i < topo_cells->num_nodes(); ++i)
-    {
-      std::cout << i << " [";
-      for (auto q : topo_cells->links(i))
-        std::cout << q << " ";
-      std::cout << "]" << std::endl;;
-    }
-
-    topo->create_connectivity(1, 0);
-
-    std::cout << "facets\n------\n";
-    auto topo_facets = topo->connectivity(1, 0);
-    for (int i = 0; i < topo_facets->num_nodes(); ++i)
-    {
-      std::cout << i << " [";
-      for (auto q : topo_facets->links(i))
-        std::cout << q << " ";
-      std::cout << "]" << std::endl;
-    }
-
-    mesh::Geometry geom = mesh::create_geometry(MPI_COMM_WORLD, *topo, elements,
-                                                cells_list, x, 2);
-    mesh::Mesh<double> mesh(MPI_COMM_WORLD, topo, geom);
-    std::cout << "num cells = " << mesh.topology()->index_map(2)->size_local()
-              << std::endl;
-    for (auto q : mesh.topology()->entity_group_offsets(2))
-      std::cout << q << " ";
-    std::cout << std::endl;
-  }
-
-  MPI_Finalize();
-
-  return 0;
-}

From 493f77683471468a3fd13f2b4109682f2b585f7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B8rgen=20Schartum=20Dokken?= <dokken92@gmail.com>
Date: Thu, 26 Sep 2024 14:11:22 +0200
Subject: [PATCH 6/7] Check for interprocess facets when assembling interior
 facet integrals (#3439)

* Add check for interprocess facet when creating integration domains for interior facets.

* Update docstring of get_local_indexing

* Small fixes

* Improve docstring

* Small fix

* Update error message

* Move assert

---------

Co-authored-by: Garth N. Wells <gnw20@cam.ac.uk>
---
 cpp/dolfinx/fem/utils.cpp                     | 28 +++++++++++++++++--
 cpp/dolfinx/fem/utils.h                       | 22 +++++++++++++++
 cpp/dolfinx/mesh/Topology.h                   | 14 +++++++---
 cpp/dolfinx/mesh/topologycomputation.cpp      |  6 ++--
 cpp/dolfinx/mesh/topologycomputation.h        |  6 ++--
 python/test/unit/fem/test_assemble_domains.py |  7 +----
 6 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/cpp/dolfinx/fem/utils.cpp b/cpp/dolfinx/fem/utils.cpp
index 91faaa86768..8f098aa686d 100644
--- a/cpp/dolfinx/fem/utils.cpp
+++ b/cpp/dolfinx/fem/utils.cpp
@@ -135,11 +135,12 @@ std::vector<std::int32_t> fem::compute_integration_domains(
   const int tdim = topology.dim();
   if ((integral_type == IntegralType::cell ? tdim : tdim - 1) != dim)
   {
-    throw std::runtime_error("Invalid MeshTags dimension: "
+    throw std::runtime_error("Invalid mesh entity dimension: "
                              + std::to_string(dim));
   }
 
   {
+    // Create span of the owned entities (leaves off any ghosts)
     assert(topology.index_map(dim));
     auto it1 = std::ranges::lower_bound(entities,
                                         topology.index_map(dim)->size_local());
@@ -150,15 +151,19 @@ std::vector<std::int32_t> fem::compute_integration_domains(
   switch (integral_type)
   {
   case IntegralType::cell:
+  {
     entity_data.insert(entity_data.begin(), entities.begin(), entities.end());
     break;
+  }
   default:
+  {
     auto f_to_c = topology.connectivity(tdim - 1, tdim);
     if (!f_to_c)
     {
       throw std::runtime_error(
           "Topology facet-to-cell connectivity has not been computed.");
     }
+
     auto c_to_f = topology.connectivity(tdim, tdim - 1);
     if (!c_to_f)
     {
@@ -186,9 +191,18 @@ std::vector<std::int32_t> fem::compute_integration_domains(
     break;
     case IntegralType::interior_facet:
     {
-      for (std::size_t j = 0; j < entities.size(); ++j)
+      // Create indicator for interprocess facets
+      assert(topology.index_map(tdim - 1));
+      const std::vector<std::int32_t>& interprocess_facets
+          = topology.interprocess_facets();
+      std::vector<std::int8_t> interprocess_marker(
+          topology.index_map(tdim - 1)->size_local()
+              + topology.index_map(tdim - 1)->num_ghosts(),
+          0);
+      std::ranges::for_each(interprocess_facets, [&interprocess_marker](auto f)
+                            { interprocess_marker[f] = 1; });
+      for (auto f : entities)
       {
-        const std::int32_t f = entities[j];
         if (f_to_c->num_links(f) == 2)
         {
           // Get the facet as a pair of (cell, local facet) pairs, one
@@ -197,6 +211,12 @@ std::vector<std::int32_t> fem::compute_integration_domains(
               = impl::get_cell_facet_pairs<2>(f, f_to_c->links(f), *c_to_f);
           entity_data.insert(entity_data.end(), facets.begin(), facets.end());
         }
+        else if (interprocess_marker[f])
+        {
+          throw std::runtime_error(
+              "Cannot compute interior facet integral over interprocess facet. "
+              "Use \"shared facet\"  ghost mode when creating the mesh.");
+        }
       }
     }
     break;
@@ -205,6 +225,8 @@ std::vector<std::int32_t> fem::compute_integration_domains(
           "Cannot compute integration domains. Integral type not supported.");
     }
   }
+  }
+
   return entity_data;
 }
 //-----------------------------------------------------------------------------
diff --git a/cpp/dolfinx/fem/utils.h b/cpp/dolfinx/fem/utils.h
index 9ea72945d22..2cde72b617a 100644
--- a/cpp/dolfinx/fem/utils.h
+++ b/cpp/dolfinx/fem/utils.h
@@ -573,6 +573,21 @@ Form<T, U> create_form_factory(
                              num_integrals_type[interior_facet]);
     auto itg = integrals.insert({IntegralType::interior_facet, {}});
     auto sd = subdomains.find(IntegralType::interior_facet);
+
+    // Create indicator for interprocess facets
+    std::vector<std::int8_t> interprocess_marker;
+    if (num_integrals_type[interior_facet] > 0)
+    {
+      assert(topology->index_map(tdim - 1));
+      const std::vector<std::int32_t>& interprocess_facets
+          = topology->interprocess_facets();
+      std::int32_t num_facets = topology->index_map(tdim - 1)->size_local()
+                                + topology->index_map(tdim - 1)->num_ghosts();
+      interprocess_marker.resize(num_facets, 0);
+      std::ranges::for_each(interprocess_facets, [&interprocess_marker](auto f)
+                            { interprocess_marker[f] = 1; });
+    }
+
     for (int i = 0; i < num_integrals_type[interior_facet]; ++i)
     {
       const int id = ids[i];
@@ -631,6 +646,13 @@ Form<T, U> create_form_factory(
             default_facets_int.insert(default_facets_int.end(), pairs.begin(),
                                       pairs.end());
           }
+          else if (interprocess_marker[f])
+          {
+            throw std::runtime_error(
+                "Cannot compute interior facet integral over interprocess "
+                "facet. Please use ghost mode shared facet when creating the "
+                "mesh");
+          }
         }
         itg.first->second.emplace_back(id, k, default_facets_int,
                                        active_coeffs);
diff --git a/cpp/dolfinx/mesh/Topology.h b/cpp/dolfinx/mesh/Topology.h
index a6a1adaa490..e3f5cfdb1a2 100644
--- a/cpp/dolfinx/mesh/Topology.h
+++ b/cpp/dolfinx/mesh/Topology.h
@@ -191,13 +191,19 @@ class Topology
   /// @brief Compute entity permutations and reflections.
   void create_entity_permutations();
 
-  /// @brief List of inter-process facets, if facet topology has been
-  /// computed.
+  /// @brief List of inter-process facets.
+  ///
+  /// "Inter-process" facets are facets that are connected (1) to a cell
+  /// that is owned by the calling process (rank) and (2) to a cell that
+  /// is owned by another process.
+  ///
+  /// @pre Inter-process facets are available only if facet topology has
+  /// been computed.
   const std::vector<std::int32_t>& interprocess_facets() const;
 
   /// @brief List of inter-process facets, if facet topology has been
-  /// computed, for the facet type in `Topology::entity_types` identified by
-  /// index
+  /// computed, for the facet type in `Topology::entity_types`
+  /// identified by index.
   /// @param index Index of facet type
   const std::vector<std::int32_t>& interprocess_facets(std::int8_t index) const;
 
diff --git a/cpp/dolfinx/mesh/topologycomputation.cpp b/cpp/dolfinx/mesh/topologycomputation.cpp
index 4fd7c39aa99..edefdbd1e26 100644
--- a/cpp/dolfinx/mesh/topologycomputation.cpp
+++ b/cpp/dolfinx/mesh/topologycomputation.cpp
@@ -95,7 +95,7 @@ int get_ownership(const U& processes, const V& vertices)
 /// @param[in] num_entities_per_cell Number of entities per cell
 /// @param[in] entity_index Initial numbering for each row in
 /// entity_list
-/// @returns Local indices and index map
+/// @returns Local indices, the index map and shared entities
 std::tuple<std::vector<int>, common::IndexMap, std::vector<std::int32_t>>
 get_local_indexing(MPI_Comm comm, const common::IndexMap& vertex_map,
                    std::span<const std::int32_t> entity_list,
@@ -804,13 +804,13 @@ mesh::compute_entities(MPI_Comm comm, const Topology& topology, int dim,
     cell_lists[i] = {cell_types[i], cells, cell_map};
   }
 
-  auto [d0, d1, im, interprocess_facets] = compute_entities_by_key_matching(
+  auto [d0, d1, im, interprocess_entities] = compute_entities_by_key_matching(
       comm, cell_lists, *vertex_map, entity_type, dim);
 
   return {d0,
           std::make_shared<graph::AdjacencyList<std::int32_t>>(std::move(d1)),
           std::make_shared<common::IndexMap>(std::move(im)),
-          std::move(interprocess_facets)};
+          std::move(interprocess_entities)};
 }
 //-----------------------------------------------------------------------------
 std::array<std::shared_ptr<graph::AdjacencyList<std::int32_t>>, 2>
diff --git a/cpp/dolfinx/mesh/topologycomputation.h b/cpp/dolfinx/mesh/topologycomputation.h
index 1f1ffcc1e9d..a5e8a98f2f7 100644
--- a/cpp/dolfinx/mesh/topologycomputation.h
+++ b/cpp/dolfinx/mesh/topologycomputation.h
@@ -41,9 +41,9 @@ class Topology;
 /// `Topology::entity_types(dim)`.
 /// @return Tuple of (cell-entity connectivity, entity-vertex
 /// connectivity, index map, list of interprocess entities).
-/// Interprocess entities lie on the "true" boundary between owned cells of each
-/// process. If the entities already exist, then {nullptr, nullptr, nullptr,
-/// std::vector()} is returned.
+/// Interprocess entities lie on the "true" boundary between owned cells
+/// of each process. If the entities already exists, then {nullptr,
+/// nullptr, nullptr, std::vector()} is returned.
 std::tuple<std::vector<std::shared_ptr<graph::AdjacencyList<std::int32_t>>>,
            std::shared_ptr<graph::AdjacencyList<std::int32_t>>,
            std::shared_ptr<common::IndexMap>, std::vector<std::int32_t>>
diff --git a/python/test/unit/fem/test_assemble_domains.py b/python/test/unit/fem/test_assemble_domains.py
index 26b5ee80eb8..d454203ca9c 100644
--- a/python/test/unit/fem/test_assemble_domains.py
+++ b/python/test/unit/fem/test_assemble_domains.py
@@ -49,12 +49,7 @@ def create_cell_meshtags_from_entities(mesh: Mesh, dim: int, cells: np.ndarray,
                 reason="Unghosted interior facets fail in parallel",
             ),
         ),
-        pytest.param(
-            GhostMode.shared_facet,
-            marks=pytest.mark.skipif(
-                condition=MPI.COMM_WORLD.size == 1, reason="Shared ghost modes fail in serial"
-            ),
-        ),
+        GhostMode.shared_facet,
     ],
 )
 

From 38b58bf2c035a1c3b17ec8e492e57f853261758f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B8rgen=20Schartum=20Dokken?= <dokken92@gmail.com>
Date: Thu, 26 Sep 2024 15:19:19 +0200
Subject: [PATCH 7/7] Updating `demo_axis.py` and `demo_pml.py` for parallel
 computing (#3433)

* Fix mesh constructor again

* Ruff

* Add correct ghost mode to demos using dS integrals

* Apply suggestions from code review

Revert refine change

* Typing fixes
---
 python/demo/demo_axis.py | 40 ++++++++++++++++++++++++++++------------
 python/demo/demo_pml.py  | 40 +++++++++++++++++++++++++++++-----------
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/python/demo/demo_axis.py b/python/demo/demo_axis.py
index f8115241f19..8ebb079d093 100644
--- a/python/demo/demo_axis.py
+++ b/python/demo/demo_axis.py
@@ -27,20 +27,14 @@
 
     import dolfinx
 
-    if PETSc.IntType == np.int64 and MPI.COMM_WORLD.size > 1:
-        print("This solver fails with PETSc and 64-bit integers becaude of memory errors in MUMPS.")
-        # Note: when PETSc.IntType == np.int32, superlu_dist is used
-        # rather than MUMPS and does not trigger memory failures.
-        exit(0)
-
     # The time-harmonic Maxwell equation is complex-valued PDE. PETSc
     # must therefore have compiled with complex scalars.
-    if not np.issubdtype(PETSc.ScalarType, np.complexfloating):
+    if not np.issubdtype(PETSc.ScalarType, np.complexfloating):  # type: ignore
         print("Demo can only be executed when PETSc using complex scalars.")
         exit(0)
 
-    scalar_type = PETSc.ScalarType
-    real_type = PETSc.RealType
+    scalar_type = PETSc.ScalarType  # type: ignore
+    real_type = PETSc.RealType  # type: ignore
 
     if not dolfinx.has_petsc:
         print("This demo requires DOLFINx to be compiled with PETSc enabled.")
@@ -463,7 +457,10 @@ def create_eps_mu(pml, rho, eps_bkg, mu_bkg):
     )
 
 model = MPI.COMM_WORLD.bcast(model, root=0)
-msh, cell_tags, facet_tags = io.gmshio.model_to_mesh(model, MPI.COMM_WORLD, 0, gdim=2)
+partitioner = dolfinx.cpp.mesh.create_cell_partitioner(dolfinx.mesh.GhostMode.shared_facet)
+msh, cell_tags, facet_tags = io.gmshio.model_to_mesh(
+    model, MPI.COMM_WORLD, 0, gdim=2, partitioner=partitioner
+)
 
 gmsh.finalize()
 MPI.COMM_WORLD.barrier()
@@ -651,9 +648,28 @@ def create_eps_mu(pml, rho, eps_bkg, mu_bkg):
         + k0**2 * ufl.inner(eps_pml * Es_m, v_m) * rho * dPml
     )
     a, L = ufl.lhs(F), ufl.rhs(F)
-
-    problem = LinearProblem(a, L, bcs=[], petsc_options={"ksp_type": "preonly", "pc_type": "lu"})
+    sys = PETSc.Sys()  # type: ignore
+    if sys.hasExternalPackage("superlu_dist"):  # type: ignore
+        mat_factor_backend = "superlu_dist"
+    elif sys.hasExternalPackage("mumps"):  # type: ignore
+        mat_factor_backend = "mumps"
+    else:
+        if msh.comm > 1:
+            raise RuntimeError("This demo requires a parallel linear algebra backend.")
+        else:
+            mat_factor_backend = "petsc"
+    problem = LinearProblem(
+        a,
+        L,
+        bcs=[],
+        petsc_options={
+            "ksp_type": "preonly",
+            "pc_type": "lu",
+            "pc_factor_mat_solver_type": mat_factor_backend,
+        },
+    )
     Esh_m = problem.solve()
+    assert problem.solver.getConvergedReason() > 0, "Solver did not converge!"
 
     # Scattered magnetic field
     Hsh_m = -1j * curl_axis(Esh_m, m, rho) / (Z0 * k0)
diff --git a/python/demo/demo_pml.py b/python/demo/demo_pml.py
index 7cf298df95d..e878e809ff3 100644
--- a/python/demo/demo_pml.py
+++ b/python/demo/demo_pml.py
@@ -64,13 +64,6 @@
 
 from petsc4py import PETSc
 
-if PETSc.IntType == np.int64 and MPI.COMM_WORLD.size > 1:
-    print("This solver fails with PETSc and 64-bit integers becaude of memory errors in MUMPS.")
-    # Note: when PETSc.IntType == np.int32, superlu_dist is used rather
-    # than MUMPS and does not trigger memory failures.
-    exit(0)
-# -
-
 # Since we want to solve time-harmonic Maxwell's equation, we require
 # that the demo is executed with DOLFINx (PETSc) complex mode.
 
@@ -121,7 +114,6 @@ def generate_mesh_wire(
     scatt_tag: int,
     pml_tag: int,
 ):
-    gmsh.model.add("nanowire")
     dim = 2
     # A dummy circle for setting a finer mesh
     c1 = gmsh.model.occ.addCircle(0.0, 0.0, 0.0, radius_wire * 0.8, angle1=0.0, angle2=2 * np.pi)
@@ -208,7 +200,7 @@ def generate_mesh_wire(
     gmsh.model.addPhysicalGroup(dim, y_group, tag=pml_tag + 2)
 
     # Marker interior surface in bkg group
-    boundaries: list[np.tynp.ping.NDArray[np.int32]] = []
+    boundaries: list[np.typing.NDArray[np.int32]] = []
     for tag in bkg_group:
         boundary_pairs = gmsh.model.get_boundary([(dim, tag)], oriented=False)
         boundaries.append(np.asarray([pair[1] for pair in boundary_pairs], dtype=np.int32))
@@ -457,7 +449,11 @@ def pml_coordinates(x: ufl.indexed.Indexed, alpha: float, k0: complex, l_dom: fl
         pml_tag,
     )
 model = MPI.COMM_WORLD.bcast(model, root=0)
-msh, cell_tags, facet_tags = gmshio.model_to_mesh(model, MPI.COMM_WORLD, 0, gdim=2)
+partitioner = dolfinx.cpp.mesh.create_cell_partitioner(dolfinx.mesh.GhostMode.shared_facet)
+
+msh, cell_tags, facet_tags = gmshio.model_to_mesh(
+    model, MPI.COMM_WORLD, 0, gdim=2, partitioner=partitioner
+)
 
 gmsh.finalize()
 MPI.COMM_WORLD.barrier()
@@ -699,8 +695,30 @@ def create_eps_mu(
 
 a, L = ufl.lhs(F), ufl.rhs(F)
 
-problem = LinearProblem(a, L, bcs=[], petsc_options={"ksp_type": "preonly", "pc_type": "lu"})
+# For factorisation prefer superlu_dist, then MUMPS, then default
+sys = PETSc.Sys()  # type: ignore
+if sys.hasExternalPackage("superlu_dist"):  # type: ignore
+    mat_factor_backend = "superlu_dist"
+elif sys.hasExternalPackage("mumps"):  # type: ignore
+    mat_factor_backend = "mumps"
+else:
+    if msh.comm > 1:
+        raise RuntimeError("This demo requires a parallel linear algebra backend.")
+    else:
+        mat_factor_backend = "petsc"
+
+problem = LinearProblem(
+    a,
+    L,
+    bcs=[],
+    petsc_options={
+        "ksp_type": "preonly",
+        "pc_type": "lu",
+        "pc_factor_mat_solver_type": mat_factor_backend,
+    },
+)
 Esh = problem.solve()
+assert problem.solver.getConvergedReason() > 0, "Solver did not converge!"
 # -
 
 # Let's now save the solution in a `bp`-file. In order to do so, we need