diff --git a/tensorflow_datasets/scripts/cli/convert_format_utils.py b/tensorflow_datasets/scripts/cli/convert_format_utils.py
index 8c03e00500f..d5afad23fdc 100644
--- a/tensorflow_datasets/scripts/cli/convert_format_utils.py
+++ b/tensorflow_datasets/scripts/cli/convert_format_utils.py
@@ -280,6 +280,48 @@ def _remove_incomplete_files(path: epath.Path) -> None:
   logging.info('Removed %d incomplete files.', num_incomplete_files)
 
 
+def _get_info_for_dirs_to_convert(
+    from_dir: epath.Path,
+    to_dir: epath.Path,
+    out_file_format: file_adapters.FileFormat,
+    overwrite: bool,
+) -> dataset_info.DatasetInfo | None:
+  """Returns the dataset info for the given dataset dirs."""
+  builder = read_only_builder_lib.builder_from_directory(from_dir)
+  if out_file_format == builder.info.file_format:
+    raise ValueError(
+        f'The file format of the dataset ({builder.info.file_format}) is the'
+        f' same as the specified out file format! ({out_file_format})'
+    )
+  if out_file_format in builder.info.alternative_file_formats:
+    if overwrite:
+      logging.warning(
+          'The file format to convert to (%s) is already an alternative file'
+          ' format. Overwriting the shards!',
+          out_file_format.value,
+      )
+      return builder.info
+    elif os.fspath(from_dir) == os.fspath(to_dir):
+      logging.info(
+          'The file format to convert to (%s) is already an alternative file'
+          ' format of the dataset in %s. Skipping conversion.',
+          os.fspath(from_dir),
+          out_file_format.value,
+      )
+      # TODO(weide) add check whether data files are actually present.
+      return None
+    else:
+      logging.warning(
+          'The file format to convert to (%s) is already an alternative file'
+          ' format, but the converted output is being written to a different'
+          ' folder, so the shards will be converted anyway. From: %s, to: %s',
+          out_file_format.value,
+          os.fspath(from_dir),
+          os.fspath(to_dir),
+      )
+      return builder.info
+
+
 def _convert_dataset_dirs(
     from_to_dirs: Mapping[epath.Path, epath.Path],
     out_file_format: file_adapters.FileFormat,
@@ -303,36 +345,16 @@ def _convert_dataset_dirs(
   logging.info('Converting %d datasets.', len(from_to_dirs))
 
   found_dataset_versions: dict[epath.Path, dataset_info.DatasetInfo] = {}
+  # TODO(weide) parallelize this, because it's slow for dirs with many datasets.
   for from_dir, to_dir in from_to_dirs.items():
-    builder = read_only_builder_lib.builder_from_directory(from_dir)
-    if out_file_format == builder.info.file_format:
-      raise ValueError(
-          f'The file format of the dataset ({builder.info.file_format}) is the'
-          f' same as the specified out file format! ({out_file_format})'
-      )
-    if out_file_format in builder.info.alternative_file_formats:
-      if overwrite:
-        logging.warning(
-            'The file format to convert to (%s) is already an alternative file'
-            ' format. Overwriting the shards!',
-            out_file_format.value,
-        )
-      elif from_dir == to_dir:
-        logging.info(
-            'The file format to convert to (%s) is already an alternative file'
-            ' format of the dataset in %s. Skipping conversion.',
-            os.fspath(from_dir),
-            out_file_format.value,
-        )
-        continue
-      else:
-        logging.warning(
-            'The file format to convert to (%s) is already an alternative file'
-            ' format, but the converted output is being written to a different'
-            ' folder, so the shards will be converted anyway.',
-            out_file_format.value,
-        )
-    found_dataset_versions[from_dir] = builder.info
+    info = _get_info_for_dirs_to_convert(
+        from_dir=from_dir,
+        to_dir=to_dir,
+        out_file_format=out_file_format,
+        overwrite=overwrite,
+    )
+    if info is not None:
+      found_dataset_versions[from_dir] = info
 
   convert_dataset_fn = functools.partial(
       _convert_dataset,