diff --git a/geometry/render/test/meshes/rainbow_stripes.png b/geometry/render/test/meshes/rainbow_stripes.png
index 446369e105d9..53ef53ae6684 100644
Binary files a/geometry/render/test/meshes/rainbow_stripes.png and b/geometry/render/test/meshes/rainbow_stripes.png differ
diff --git a/geometry/render_vtk/BUILD.bazel b/geometry/render_vtk/BUILD.bazel
index 600a9d7731dc..b8348857351c 100644
--- a/geometry/render_vtk/BUILD.bazel
+++ b/geometry/render_vtk/BUILD.bazel
@@ -153,6 +153,7 @@ drake_cc_googletest(
         "//conditions:default": [],
     }),
     data = [
+        ":test/multi_material_mesh.png",
         ":test/whole_image_custom_color.png",
         ":test/whole_image_custom_depth.png",
         ":test/whole_image_custom_label.png",
diff --git a/geometry/render_vtk/test/internal_render_engine_vtk_test.cc b/geometry/render_vtk/test/internal_render_engine_vtk_test.cc
index 03b120b9a146..26bda8a2f70d 100644
--- a/geometry/render_vtk/test/internal_render_engine_vtk_test.cc
+++ b/geometry/render_vtk/test/internal_render_engine_vtk_test.cc
@@ -29,6 +29,7 @@
 #include "drake/geometry/read_gltf_to_memory.h"
 #include "drake/geometry/shape_specification.h"
 #include "drake/math/rigid_transform.h"
+#include "drake/math/roll_pitch_yaw.h"
 #include "drake/math/rotation_matrix.h"
 #include "drake/systems/sensors/image.h"
 #include "drake/systems/sensors/image_io.h"
@@ -92,6 +93,7 @@ using Eigen::Vector2d;
 using Eigen::Vector3d;
 using Eigen::Vector4d;
 using math::RigidTransformd;
+using math::RollPitchYawd;
 using math::RotationMatrixd;
 using render::ColorRenderCamera;
 using render::DepthRange;
@@ -276,6 +278,85 @@ void SaveTestOutputImage(const Image& image, const std::string& filename) {
   }
 }
 
+// Given the position of the camera (C) the position of an aiming target T
+// (and a direction point "up"), computes a pose for the camera X_WC such that
+// the camera is located at C, looking towards T, with the top of the camera
+// pointing as much as possible in the "up" direction.
+// @pre |C - T| > 0.
+// @pre |(C - T).dot(U)| < 1. (I.e., up doesn't point to or away from the
+// target).
+// TODO(SeanCurtis-TRI): This should be a utility somewhere related to the
+// render cameras. Possibly just a free function.
+RigidTransformd PoseCamera(const Vector3d& p_WC, const Vector3d& p_WT,
+                           const Vector3d& v_WU = Vector3d::UnitZ()) {
+  // The camera looks in the Cz direction with -Cy pointing up in the image
+  // and Cx right.
+  const Vector3d p_CT_W = p_WT - p_WC;
+  const Vector3d v_WCz = p_CT_W.normalized();
+  DRAKE_DEMAND(std::abs(v_WCz.dot(v_WU)) < 0.999999);
+  // Remember -Cy points in the U direction.
+  const Vector3d v_WCx = -v_WU.cross(v_WCz).normalized();
+  const Vector3d v_WCy = v_WCz.cross(v_WCx).normalized();
+  return RigidTransformd(
+      RotationMatrixd::MakeFromOrthonormalColumns(v_WCx, v_WCy, v_WCz), p_WC);
+}
+
+// Compares the test image against a reference image.
+//
+// The bytes of `test_image` are compared with the decoded bytes of the image
+// located at `ref_filename` to within the given tolerance.
+template <typename ImageType>
+void CompareImages(const ImageType& test_image, const std::string& ref_filename,
+                   double tolerance) {
+  // The type we save to disk and compare the bytes of; may not be the same as
+  // the input image type.
+  using CompareType =
+      std::conditional_t<std::is_same_v<ImageType, ImageDepth32F>, ImageRgba8U,
+                         ImageType>;
+  CompareType compare_image;
+  if constexpr (std::is_same_v<ImageType, ImageDepth32F>) {
+    // Normalize the depth image so that the resulting image is interpretable
+    // by humans.
+    ColorizeDepthImage<double> colorizer;
+    colorizer.Calc(test_image, &compare_image);
+  } else {
+    compare_image = test_image;
+  }
+
+  const fs::path ref_path = FindResourceOrThrow(ref_filename);
+  CompareType expected_image;
+  ASSERT_TRUE(systems::sensors::LoadImage(ref_path, &expected_image));
+
+  // Confirming image equivalence is tricky. We want to be sensitive to changes
+  // in code that might produce different images but, at the same time, write a
+  // test that will survive the vagaries of rendering in CI. To that end, we
+  // employ two thresholds:
+  //
+  //   - tolerance: allowed per-channel deviation (passed as parameter).
+  //   - conformity: the fraction of channel values that must deviate less than
+  //                 `tolerance` (hard-coded below).
+  //
+  // Tolerance is provided by each test, but the conformity is hard-coded to be
+  // a high value (99.5%). Essentially, each of the primitives added by
+  // AddShapeRows() fills about 0.5% of the final image. If we want to recognize
+  // when any of those shapes changes in some significant way, we need to fail
+  // if that number of pixels deviates.
+  using T = typename CompareType::T;
+  ASSERT_EQ(expected_image.size(), compare_image.size());
+  Eigen::Map<const VectorX<T>> data_expected(expected_image.at(0, 0),
+                                             expected_image.size());
+  Eigen::Map<const VectorX<T>> data_actual(compare_image.at(0, 0),
+                                           compare_image.size());
+  const Eigen::ArrayXd differences = (data_expected.template cast<double>() -
+                                      data_actual.template cast<double>())
+                                         .array()
+                                         .abs();
+  const int num_acceptable = (differences <= tolerance).count();
+  const double kConformity = 0.995;
+  EXPECT_GE(num_acceptable / static_cast<float>(expected_image.size()),
+            kConformity);
+}
+
 // Sanitizes a test case description into a legal filename.
 std::string SafeFilename(const std::string& description) {
   std::string filename{description};
@@ -884,8 +965,7 @@ TEST_F(RenderEngineVtkTest, GltfUnsupportedExtensionRequired) {
 // Primitives result in a geometry with a single Part. However, we can load
 // meshes from .gltf or .obj files that will create multiple parts. The meshes
 // in this test are conceptually identical: a cube with different colors on each
-// face. We'll render the cube six times with different orientations to expose
-// each colored face to the camera, and confirm the observed color.
+// face. They should render nearly identically as well.
 //
 // The glTF file has been structured to further test various glTF features,
 // including:
@@ -903,108 +983,65 @@ TEST_F(RenderEngineVtkTest, GltfUnsupportedExtensionRequired) {
 //  7. Single meshes with multiple materials.
 //
 // If all of that is processed correctly, we should get a cube with a different
-// color on each face. We'll test for those colors.
+// color on each face.
 //
 // The obj features under test are a subset of the glTF features.
 TEST_F(RenderEngineVtkTest, MultiMaterialObjects) {
-  if (FLAGS_backend == "EGL") {
-    log()->warn("Skipping MultiMaterialObjects test on EGL (expected-fail)");
-    return;
-  }
-
-  // The name of the face we expect presented to the camera, and the rotation
-  // required to put it in front of the camera. We'll use the name to look up
-  // the expected color.
-  struct Face {
-    std::string name;
-    RotationMatrixd rotation;
-  };
-
-  const std::vector<std::string> filenames{
-      FindResourceOrThrow("drake/geometry/render/test/meshes/rainbow_box.gltf"),
-      FindResourceOrThrow("drake/geometry/render/test/meshes/rainbow_box.obj")};
-
-  // The expected *illuminated* material color, keyed first by mesh extension
-  // and then by face name.
-  //
-  // For the glTF, the material/texture colors are not exactly reproduced
-  // because the lighting model associated with the PBR shader. For now, we
-  // account for this by putting the observed color in the test. If we change
-  // the glTF (or lighting model), we'll need to update these values
-  // accordingly.
-  //
-  // For the obj, it should be a reproduction of the diffuse color in the
-  // .mtl file (where there is no texture) or the product of texture color
-  // and Kd value. The red, green, and blue faces all share a common textured
-  // material with the Kd value of (0.8, 0.8, 0.8). In the map below, the first
-  // Rgba color represents the texture value, the second, the Kd value.
-  const std::map<std::string, std::map<std::string, Rgba>> rendered_color{
-      {".obj",
-       {{"green", Rgba(0.016, 0.945, 0.129) * Rgba(0.8, 0.8, 0.8)},
-        {"orange", Rgba(0.8, 0.359, 0.023)},
-        {"red", Rgba(0.945, 0.016, 0.016) * Rgba(0.8, 0.8, 0.8)},
-        {"blue", Rgba(0.098, 0.016, 0.945) * Rgba(0.8, 0.8, 0.8)},
-        {"yellow", Rgba(0.799, 0.8, 0)},
-        {"purple", Rgba(0.436, 0, 0.8)}}},
-      {".gltf",
-       {{"green", Rgba(0.078, 0.553, 0.110)},
-        {"orange", Rgba(0.529, 0.259, 0.125)},
-        {"red", Rgba(0.553, 0.078, 0.078)},
-        {"blue", Rgba(0.098, 0.078, 0.553)},
-        {"yellow", Rgba(0.529, 0.529, 0.075)},
-        {"purple", Rgba(0.310, 0.075, 0.529)}}}};
-
-  const std::vector<Face> faces{
-      {.name = "green", .rotation = RotationMatrixd()},
-      {.name = "orange", .rotation = RotationMatrixd::MakeXRotation(M_PI / 2)},
-      {.name = "red", .rotation = RotationMatrixd::MakeXRotation(M_PI)},
-      {.name = "blue", .rotation = RotationMatrixd::MakeXRotation(-M_PI / 2)},
-      {.name = "yellow", .rotation = RotationMatrixd::MakeYRotation(-M_PI / 2)},
-      {.name = "purple", .rotation = RotationMatrixd::MakeYRotation(M_PI / 2)},
-  };
-
-  for (const auto& filename : filenames) {
-    Init(X_WC_, true);
-    Mesh mesh(filename);
-    // When we add a glTF file, the terrain's material color gets promoted to
-    // PBR (to match). Therefore, the expected outlier color needs to shift
-    // to account for the material change.
-    expected_outlier_color_ = mesh.extension() == ".gltf"
-                                  ? TestColor(Rgba(0.4392, 0.4392, 0.4745))
-                                  : kTerrainColor;
-    expected_label_ = RenderLabel(3);
-    // Note: Passing diffuse color or texture to a glTF spawns a warning.
-    PerceptionProperties material;
-    material.AddProperty("label", "id", expected_label_);
-    const GeometryId id = GeometryId::get_new_id();
-    renderer_->RegisterVisual(id, mesh, material, RigidTransformd::Identity(),
-                              true /* needs update */);
+  const std::vector<fs::path> filenames{
+      fs::path(FindResourceOrThrow(
+          "drake/geometry/render/test/meshes/rainbow_box.gltf")),
+      fs::path(FindResourceOrThrow(
+          "drake/geometry/render/test/meshes/rainbow_box.obj"))};
+
+  // The camera has a smaller image (640 x 480), a narrower field of view (so
+  // we get a more orthographic view into the geometry) and is posed so that
+  // we can see both cubes.
+  const RigidTransformd X_WR = PoseCamera(Vector3d(8, -8, 8), Vector3d::Zero());
+  const int w = 480;
+  const int h = 360;
+  const CameraInfo& source_intrinsics = depth_camera_.core().intrinsics();
+  const CameraInfo intrinsics(w, h, source_intrinsics.fov_y() / 2);
+  const DepthRenderCamera camera(
+      {"unused", intrinsics, depth_camera_.core().clipping(),
+       depth_camera_.core().sensor_pose_in_camera_body()},
+      depth_camera_.depth_range());
 
-    // Render from the original to make sure it's complete and correct.
-    for (const auto& face : faces) {
-      expected_color_ = rendered_color.at(mesh.extension()).at(face.name);
+  // We'll load the same mesh twice, placed symmetrically around the origin.
+  const GeometryId id1 = GeometryId::get_new_id();
+  const RigidTransformd X_WM1(Vector3d(-2, 0, 0));
+  const GeometryId id2 = GeometryId::get_new_id();
+  // Rotate the second mesh so, from the single view, we can see all six sides
+  // of the mesh cube.
+  const RigidTransformd X_WM2(RollPitchYawd(0, M_PI_2, M_PI),
+                              Vector3d(2, 0, 0));
 
-      renderer_->UpdatePoses(unordered_map<GeometryId, RigidTransformd>{
-          {id, RigidTransformd(face.rotation)}});
-      PerformCenterShapeTest(__LINE__, renderer_.get(),
-                             fmt::format("{} test on {} face - original",
-                                         mesh.extension(), face.name)
-                                 .c_str());
-    }
+  PerceptionProperties material;
+  material.AddProperty("label", "id", RenderLabel(17));
+  for (const fs::path& f : filenames) {
+    Init(X_WR, false);
+    Mesh mesh(f);
+    renderer_->RegisterVisual(id1, mesh, material, X_WM1,
+                              false /* needs update */);
+    renderer_->RegisterVisual(id2, mesh, material, X_WM2,
+                              false /* needs update */);
 
-    // Repeat that from a clone to confirm that the artifacts survived cloning.
-    std::unique_ptr<RenderEngine> clone = renderer_->Clone();
-    RenderEngineVtk* vtk_clone = dynamic_cast<RenderEngineVtk*>(clone.get());
-    for (const auto& face : faces) {
-      expected_color_ = rendered_color.at(mesh.extension()).at(face.name);
-
-      vtk_clone->UpdatePoses(unordered_map<GeometryId, RigidTransformd>{
-          {id, RigidTransformd(face.rotation)}});
-      PerformCenterShapeTest(
-          __LINE__, vtk_clone,
-          fmt::format("{} test on {} face - clone", mesh.extension(), face.name)
-              .c_str());
-    }
+    const std::string name =
+        fmt::format("image for {}", f.extension().string());
+    ImageRgba8U color(w, h);
+    ImageDepth32F depth(w, h);
+    ImageLabel16I label(w, h);
+    this->Render(__LINE__, name, renderer_.get(), &camera, &color, &depth,
+                 &label);
+
+    SCOPED_TRACE(name);
+    // The image was made from the glTF file. glTF files get rendered using
+    // PBR. OBJs get rendered using Phong materials. It leads to slightly
+    // different levels of illumination, requiring a bit more tolerance (about
+    // 10%). When looking at the images, they should be qualitatively similar.
+    const double tolerance = f.extension() == ".gltf" ? 2 : 25;
+    CompareImages(color,
+                  "drake/geometry/render_vtk/test/multi_material_mesh.png",
+                  tolerance);
   }
 }
 
@@ -2552,78 +2589,6 @@ double AddShapeRows(RenderEngineVtk* render_engine,
   return x;
 }
 
-// Compares the test image against a reference image.
-//
-// The test image will be written to the test outputs (if defined) with the same
-// name as the reference file, but with "_test" appended to the filename. The
-// bytes of the image are compared to within the given tolerance.
-//
-// We want the referenced images to be meaningful to a human as well. So, we
-// adopt the following strategy:
-//
-//   1. color images: No extra action taken.
-//   2. label images: To view the 16-bit image in an 8-bit world, we use label
-//                    values that truncate nicely (see AddShapeRows()).
-//   3. depth images: To view the 32-bit depth image in an 8-bit world, we
-//                    color the depth via ColorizeDepthImage. We are not saving
-//                    the literal depth values to disk. By implication, it means
-//                    we're also only *testing* the color-encoded depth as well.
-template <typename ImageType>
-void CompareImages(const ImageType& test_image, const std::string& ref_filename,
-                   double tolerance, std::string_view log_suffix = {}) {
-  // The type we save to disk and compare the bytes of; may not be the same as
-  // the input image type.
-  using CompareType =
-      std::conditional_t<std::is_same_v<ImageType, ImageDepth32F>, ImageRgba8U,
-                         ImageType>;
-  CompareType compare_image;
-  if constexpr (std::is_same_v<ImageType, ImageDepth32F>) {
-    // Normalize the depth image so that the resulting image is interpretable
-    // by humans.
-    ColorizeDepthImage<double> colorizer;
-    colorizer.Calc(test_image, &compare_image);
-  } else {
-    compare_image = test_image;
-  }
-
-  const fs::path ref_path = FindResourceOrThrow(ref_filename);
-  SaveTestOutputImage(
-      compare_image,
-      fmt::format("{}{}_test.png", ref_path.stem().string(), log_suffix));
-
-  CompareType expected_image;
-  ASSERT_TRUE(systems::sensors::LoadImage(ref_path, &expected_image));
-
-  // Confirming image equivalence is tricky. We want to be sensitive to changes
-  // in code that might produce different images but, at the same time, write a
-  // test that will survive the vagaries of rendering in CI. To that end, we
-  // employ two thresholds:
-  //
-  //   - tolerance: allowed per-channel deviation (passed as parameter).
-  //   - conformity: the fraction of channel values that must deviate less than
-  //                 `tolerance` (hard-coded below).
-  //
-  // Tolerance is provided by each test, but the conformity is hard-coded to be
-  // a high value (99.5%). Essentially, each of the primitives added by
-  // AddShapeRows() fills about 0.5% of the final image. If we want to recognize
-  // when any of those shapes changes in some significant way, we need to fail
-  // if that number of pixels deviates.
-  using T = typename CompareType::T;
-  ASSERT_EQ(expected_image.size(), compare_image.size());
-  Eigen::Map<const VectorX<T>> data_expected(expected_image.at(0, 0),
-                                             expected_image.size());
-  Eigen::Map<const VectorX<T>> data_actual(compare_image.at(0, 0),
-                                           compare_image.size());
-  const Eigen::ArrayXd differences = (data_expected.template cast<double>() -
-                                      data_actual.template cast<double>())
-                                         .array()
-                                         .abs();
-  const int num_acceptable = (differences <= tolerance).count();
-  const double kConformity = 0.995;
-  EXPECT_GE(num_acceptable / static_cast<float>(expected_image.size()),
-            kConformity);
-}
-
 // Given lights measured and expressed in World, re-expresses them in Camera.
 vector<LightParameter> TransformLightsToCamera(
     const vector<LightParameter>& lights_W, const RigidTransformd& X_WC) {
@@ -2680,7 +2645,8 @@ TEST_F(RenderEngineVtkTest, WholeImageDefaultParams) {
   ImageRgba8U color(w, h);
   ImageDepth32F depth(w, h);
   ImageLabel16I label(w, h);
-  this->Render(0, std::nullopt, &engine, &camera, &color, &depth, &label);
+  this->Render(__LINE__, "whole_image_default", &engine, &camera, &color,
+               &depth, &label);
 
   {
     SCOPED_TRACE("Color image");
@@ -2778,26 +2744,27 @@ TEST_F(RenderEngineVtkTest, WholeImageCustomParams) {
     ImageRgba8U color(w, h);
     ImageDepth32F depth(w, h);
     ImageLabel16I label(w, h);
-    this->Render(0, std::nullopt, &engine, &camera, &color, &depth, &label);
+    this->Render(__LINE__,
+                 fmt::format("whole_image_custom_color_{}", lights[0].frame),
+                 &engine, &camera, &color, &depth, &label);
 
-    const std::string log_suffix = fmt::format("_{}", lights[0].frame);
     {
       SCOPED_TRACE(fmt::format("Color image - {}", lights[0].frame));
       CompareImages(
           color, "drake/geometry/render_vtk/test/whole_image_custom_color.png",
-          /* tolerance = */ 2, log_suffix);
+          /* tolerance = */ 2);
     }
     {
       SCOPED_TRACE("Depth image");
       CompareImages(
           depth, "drake/geometry/render_vtk/test/whole_image_custom_depth.png",
-          /* tolerance = */ 2, log_suffix);
+          /* tolerance = */ 2);
     }
     {
       SCOPED_TRACE("Label image");
       CompareImages(
           label, "drake/geometry/render_vtk/test/whole_image_custom_label.png",
-          /* tolerance = */ 0, log_suffix);
+          /* tolerance = */ 0);
     }
   }
 }
@@ -2886,8 +2853,8 @@ TEST_F(RenderEngineVtkTest, WholeImageVerticalAspectRatio) {
   ImageRgba8U tall_color(w, h);
   ImageDepth32F tall_depth(w, h);
   ImageLabel16I tall_label(w, h);
-  this->Render(0, std::nullopt, &engine, &camera, &tall_color, &tall_depth,
-               &tall_label);
+  this->Render(__LINE__, "whole_image_custom_color_vertical_ar", &engine,
+               &camera, &tall_color, &tall_depth, &tall_label);
 
   // Now clip down to reference size. We only need to clip the color image;
   // label and depth images don't get the render-as-square treatment because
@@ -2905,7 +2872,7 @@ TEST_F(RenderEngineVtkTest, WholeImageVerticalAspectRatio) {
 
   CompareImages(color,
                 "drake/geometry/render_vtk/test/whole_image_custom_color.png",
-                /* tolerance = */ 2, "_vertical_ar");
+                /* tolerance = */ 2);
 }
 
 }  // namespace
diff --git a/geometry/render_vtk/test/multi_material_mesh.png b/geometry/render_vtk/test/multi_material_mesh.png
new file mode 100644
index 000000000000..16a134fc77ff
Binary files /dev/null and b/geometry/render_vtk/test/multi_material_mesh.png differ