Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IWYU to CI #17078

Draft
wants to merge 19 commits into
base: branch-24.12
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
569 changes: 288 additions & 281 deletions .github/workflows/pr.yaml

Large diffs are not rendered by default.

14 changes: 1 addition & 13 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,11 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
build_type: pull-request
# Use the wheel container so we can skip conda solves and since our
# primary static consumers (Spark) are not in conda anyway.
container_image: "rapidsai/ci-wheel:latest"
run_script: "ci/configure_cpp_static.sh"
clang-tidy:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
run_script: "ci/clang_tidy.sh"
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
24 changes: 22 additions & 2 deletions ci/clang_tidy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

set -euo pipefail

rapids-logger "Create clang-tidy conda environment"
rapids-logger "Create checks conda environment"
. /opt/conda/etc/profile.d/conda.sh

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -24,6 +24,26 @@ RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

source rapids-configure-sccache

# TODO: For testing purposes, clone and build IWYU. We can switch to a release
# once a clang 19-compatible version is available, which should be soon
# (https://github.com/include-what-you-use/include-what-you-use/issues/1641).
git clone https://github.com/include-what-you-use/include-what-you-use.git
pushd include-what-you-use
# IWYU's CMake build uses some Python scripts that assume that the cwd is
# importable, so support that legacy behavior.
export PYTHONPATH=${PWD}:${PYTHONPATH:-}
cmake -S . -B build -GNinja --install-prefix=${CONDA_PREFIX}
cmake --build build
cmake --install build
popd

# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled.
cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja
cmake --build cpp/build
cmake --build cpp/build > build_output.txt 2>&1

# Parse the build output to extract only IWYU's proposed changes.
python cpp/scrips/parse_iwyu_output.py build_output.txt iwyu_output.txt
vyasr marked this conversation as resolved.
Show resolved Hide resolved

# Save the IWYU output as an artifact.
mkdir -p ${RAPIDS_ARTIFACTS_DIR}
mv iwyu_output.txt ${RAPIDS_ARTIFACTS_DIR}/iwyu_output.txt
2 changes: 1 addition & 1 deletion cpp/.clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Checks:
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: '*'
WarningsAsErrors: ''
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
Expand Down
11 changes: 9 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,13 @@ if(CUDF_CLANG_TIDY)
"clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}"
)
endif()

find_program(IWYU_EXE NAMES include-what-you-use iwyu REQUIRED)
endif()

# Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES.
function(enable_clang_tidy target)
set(_tidy_options)
set(_tidy_options ENABLE_IWYU)
set(_tidy_one_value)
set(_tidy_multi_value SKIPPED_FILES)
cmake_parse_arguments(
Expand All @@ -191,6 +193,9 @@ function(enable_clang_tidy target)
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
if(_TIDY_ENABLE_IWYU)
set_target_properties(${target} PROPERTIES CXX_INCLUDE_WHAT_YOU_USE "${IWYU_EXE}")
endif()
foreach(file IN LISTS _TIDY_SKIPPED_FILES)
set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON)
endforeach()
Expand Down Expand Up @@ -767,7 +772,9 @@ target_compile_options(
cudf PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)
enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp)
enable_clang_tidy(
cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp ENABLE_IWYU
)

if(CUDF_BUILD_STACKTRACE_DEBUG)
# Remove any optimization level to avoid nvcc warning "incompatible redefinition for option
Expand Down
157 changes: 157 additions & 0 deletions cpp/scripts/parse_iwyu_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Helper script to modify IWYU output to only include removals."""

import argparse
import re

def parse_log(log_content: str) -> tuple[dict[str, list[str]], dict[str, list[str]], dict[str, list[str]]]:
"""Parse the log content to extract the include lists."""
add_includes = {}
remove_includes = {}
full_include_lists = {}

# Regex to match "should add" and "should remove" sections
add_pattern = re.compile(r'(.+)\s+should add these lines:\n((?:.+\n)+)')
remove_pattern = re.compile(r'(.+)\s+should remove these lines:\n((?:.+\n)+)')
full_include_pattern = re.compile(r'The full include-list for (.+):\n((?:.+\n)+?)---')

# Parse "should add these lines"
for match in add_pattern.finditer(log_content):
file_path, includes = match.groups()
add_includes[file_path.strip()] = [line.strip() for line in includes.splitlines()]

# Parse "should remove these lines"
for match in remove_pattern.finditer(log_content):
file_path, includes = match.groups()
remove_includes[file_path.strip()] = [line.strip() for line in includes.splitlines()]

# Parse "full include-list"
for match in full_include_pattern.finditer(log_content):
file_path, includes = match.groups()
full_include_lists[file_path.strip()] = [line.strip() for line in includes.splitlines()]

return add_includes, remove_includes, full_include_lists


def extract_include_file(include_line):
"""Extract the core file path from an #include directive."""
match = re.search(r'#include\s+[<"]([^">]+)[">]', include_line)
if match:
return match.group(1)
return None


def process_includes(add_includes, remove_includes):
"""Process the include lists to remove any add/remove duplicates."""
# Make a copy of the dictionary keys to safely iterate over
add_keys = list(add_includes.keys())

for file_path in add_keys:
adds = add_includes[file_path]
add_files = {extract_include_file(line) for line in adds}

if file_path in remove_includes:
remove_files = {extract_include_file(line) for line in remove_includes[file_path]}

# Update remove_includes by filtering out matched files
remove_includes[file_path] = [
line for line in remove_includes[file_path]
if extract_include_file(line) not in add_files
]

# Also remove matching entries from add_includes
add_includes[file_path] = [
line for line in adds
if extract_include_file(line) not in remove_files
]


def update_full_include_list(add_includes, full_include_lists):
"""Update the full include-list to remove any includes that are in add_includes."""
# Update the full include-list to remove any includes that are in add_includes based on file name
for file_path, adds in add_includes.items():
add_files = {extract_include_file(line) for line in adds}
if file_path in full_include_lists:
full_include_lists[file_path] = [
line for line in full_include_lists[file_path]
if extract_include_file(line) not in add_files
]


def write_output(file_path, add_includes, remove_includes, full_include_lists):
"""Write the output back in the desired format."""
with open(file_path, 'w') as f:
for file in sorted(set(add_includes.keys()).union(remove_includes.keys()).union(full_include_lists.keys())):
# Write "should add these lines"
if file in add_includes and add_includes[file]:
f.write(f"{file} should add these lines:\n")
for line in add_includes[file]:
f.write(f"{line}\n")
f.write("\n")

# Write "should remove these lines"
if file in remove_includes and remove_includes[file]:
f.write(f"{file} should remove these lines:\n")
for line in remove_includes[file]:
f.write(f"{line}\n") # No extra minus sign
f.write("\n")

# Write "The full include-list"
if file in full_include_lists and full_include_lists[file]:
f.write(f"The full include-list for {file}:\n")
for line in full_include_lists[file]:
f.write(f"{line}\n")
f.write("---\n")


def modify_log(log_content, output_file="output.txt"):
"""Modify the log content to only include removals."""
# Step 1: Parse the log
add_includes, remove_includes, full_include_lists = parse_log(log_content)

# Step 2: Process the includes
process_includes(add_includes, remove_includes)

# Step 3: Update the full include-list
update_full_include_list(add_includes, full_include_lists)

# Step 4: Write the output back in the desired format
write_output(output_file, add_includes, remove_includes, full_include_lists)



def main():
parser = argparse.ArgumentParser(
description="Modify IWYU output to only include removals."
)
parser.add_argument("input", help="File containing IWYU output")

# Add output file parameter
parser.add_argument(
"output",
nargs="?",
help="Output file to write the modified output to",
default="iwyu_output.txt",
)
args = parser.parse_args()
with open(args.input, "r") as f:
log_content = f.read()
modify_log(log_content, args.output)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ function(ConfigureTest CMAKE_TEST_NAME)
"GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}>"
)
endif()
enable_clang_tidy(${CMAKE_TEST_NAME})
enable_clang_tidy(${CMAKE_TEST_NAME} ENABLE_IWYU)
endfunction()

# ##################################################################################################
Expand Down
5 changes: 5 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,11 @@ dependencies:
packages:
- clang==19.1.0
- clang-tools==19.1.0
# TODO: These are build requirements for IWYU and can be replaced
# with IWYU itself once CI is ready.
- clangdev==19.1.0
- llvm==19.1.0
- llvmdev==19.1.0
docs:
common:
- output_types: [conda]
Expand Down
Loading