From e369b9276c2e27c356b5a6b8595fced4c415642b Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 13 Feb 2024 01:19:28 -0800 Subject: [PATCH] feat: create an exportable version of plan loading (#91) This PR adds an installable target of the plan loading routines (notably the text format) that is more readily consumed by other languages. --------- Co-authored-by: Weston Pace --- CMakeLists.txt | 2 + export/CMakeLists.txt | 3 + export/planloader/CMakeLists.txt | 22 +++++++ export/planloader/planloader.cpp | 61 +++++++++++++++++++ export/planloader/planloader.h | 46 ++++++++++++++ export/planloader/tests/CMakeLists.txt | 33 ++++++++++ export/planloader/tests/PlanLoaderTest.cpp | 42 +++++++++++++ src/substrait/common/Io.cpp | 2 + .../antlr4/cmake/ExternalAntlr4Cpp.cmake | 2 + 9 files changed, 213 insertions(+) create mode 100644 export/CMakeLists.txt create mode 100644 export/planloader/CMakeLists.txt create mode 100644 export/planloader/planloader.cpp create mode 100644 export/planloader/planloader.h create mode 100644 export/planloader/tests/CMakeLists.txt create mode 100644 export/planloader/tests/PlanLoaderTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 552c0897..b4644b23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) option(SUBSTRAIT_CPP_SANITIZE_DEBUG_BUILD "Turns on address and undefined memory sanitization runtime checking." @@ -55,3 +56,4 @@ if(${SUBSTRAIT_CPP_BUILD_TESTING}) endif() add_subdirectory(src/substrait) +add_subdirectory(export) diff --git a/export/CMakeLists.txt b/export/CMakeLists.txt new file mode 100644 index 00000000..46aa69df --- /dev/null +++ b/export/CMakeLists.txt @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: Apache-2.0 + +add_subdirectory(planloader) diff --git a/export/planloader/CMakeLists.txt b/export/planloader/CMakeLists.txt new file mode 100644 index 00000000..13eceabe --- /dev/null +++ b/export/planloader/CMakeLists.txt @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: Apache-2.0 + +if(NOT BUILD_SUBDIR_NAME EQUAL "release") + message( + SEND_ERROR, + "The planloader library does not work in Debug mode due to its dependencies." + ) +endif() + +add_library(planloader SHARED planloader.cpp) + +add_dependencies(planloader substrait_io) +target_link_libraries(planloader substrait_io) + +install( + TARGETS planloader + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + PRIVATE_HEADER DESTINATION ${CMAKE_INSTALL_INCDIR}) + +if(${SUBSTRAIT_CPP_BUILD_TESTING}) + add_subdirectory(tests) +endif() diff --git a/export/planloader/planloader.cpp b/export/planloader/planloader.cpp new file mode 100644 index 00000000..2a52cc80 --- /dev/null +++ b/export/planloader/planloader.cpp @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "planloader.h" + +#include +#include + +extern "C" { + +// Load a Substrait plan (in any format) from disk. +// Stores the Substrait plan in planBuffer in serialized form. +// Returns a SerializedPlan structure containing either the serialized plan or +// an error message. error_message is nullptr upon success. +SerializedPlan* load_substrait_plan(const char* filename) { + auto newPlan = new SerializedPlan(); + newPlan->buffer = nullptr; + newPlan->size = 0; + newPlan->error_message = nullptr; + + auto planOrError = io::substrait::loadPlan(filename); + if (!planOrError.ok()) { + auto errMsg = planOrError.status().message(); + newPlan->error_message = new char[errMsg.length()+1]; + strncpy(newPlan->error_message, errMsg.data(), errMsg.length()+1); + return newPlan; + } + ::substrait::proto::Plan plan = *planOrError; + std::string text = plan.SerializeAsString(); + newPlan->buffer = new unsigned char[text.length()+1]; + memcpy(newPlan->buffer, text.data(), text.length()+1); + newPlan->size = static_cast( + text.length() & + std::numeric_limits::max()); + return newPlan; +} + +void free_substrait_plan(SerializedPlan* plan) { + delete[] plan->buffer; + delete[] plan->error_message; + delete plan; +} + +// Write a serialized Substrait plan to disk in the specified format. +// On error returns a non-empty error message. +// On success a nullptr is returned. +const char* save_substrait_plan( + const unsigned char* plan_data, + int32_t plan_data_length, + const char* filename, + io::substrait::PlanFileFormat format) { + ::substrait::proto::Plan plan; + std::string data((const char*) plan_data, plan_data_length); + plan.ParseFromString(data); + auto result = io::substrait::savePlan(plan, filename, format); + if (!result.ok()) { + return result.message().data(); + } + return nullptr; +} + +} // extern "C" diff --git a/export/planloader/planloader.h b/export/planloader/planloader.h new file mode 100644 index 00000000..3116371a --- /dev/null +++ b/export/planloader/planloader.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include + +extern "C" { + +// Since this is actually C code, stick to C style names for exporting. +// NOLINTBEGIN(readability-identifier-naming) + +using SerializedPlan = struct { + // If set, contains a serialized ::substrait::proto::Plan object. + unsigned char *buffer; + // If buffer is set, this is the size of the buffer. + int32_t size; + // If null the buffer is valid, otherwise this points to a null terminated + // error string. + char *error_message; +}; + +// Load a Substrait plan (in any format) from disk. +// +// Accepts filename as a null-terminated C string. +// Returns a SerializedPlan structure containing either the serialized plan or +// an error message. This SerializedPlan should be freed using +// free_substrait_plan. +SerializedPlan* load_substrait_plan(const char* filename); + +// Frees a SerializedPlan that was returned from load_substrait_plan. +void free_substrait_plan(SerializedPlan* plan); + +// Write a serialized Substrait plan to disk in the specified format. +// +// plan_data is a Substrait Plan serialized into a byte array with length +// plan_data_length. +// Filename is a null-terminated C string. +// On error returns a non-empty error message. +// On success an empty string is returned. +const char* save_substrait_plan( + const unsigned char* plan_data, + int32_t plan_data_length, + const char* filename, + io::substrait::PlanFileFormat format); + +// NOLINTEND(readability-identifier-naming) + +} // extern "C" diff --git a/export/planloader/tests/CMakeLists.txt b/export/planloader/tests/CMakeLists.txt new file mode 100644 index 00000000..debc785b --- /dev/null +++ b/export/planloader/tests/CMakeLists.txt @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_path(GET CMAKE_CURRENT_BINARY_DIR PARENT_PATH + CMAKE_CURRENT_BINARY_PARENT_DIR) + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_PARENT_DIR}) + +add_test_case( + planloader_test + SOURCES + PlanLoaderTest.cpp + EXTRA_LINK_LIBS + planloader + gmock + gtest + gtest_main) + +set(TEXTPLAN_SOURCE_DIR "${CMAKE_SOURCE_DIR}/src/substrait/textplan") + +add_custom_command( + TARGET planloader_test + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." + COMMAND ${CMAKE_COMMAND} -E make_directory + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" + COMMAND + ${CMAKE_COMMAND} -E copy + "${TEXTPLAN_SOURCE_DIR}/converter/data/q6_first_stage.json" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json") + +message( + STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" +) diff --git a/export/planloader/tests/PlanLoaderTest.cpp b/export/planloader/tests/PlanLoaderTest.cpp new file mode 100644 index 00000000..744ac1d2 --- /dev/null +++ b/export/planloader/tests/PlanLoaderTest.cpp @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include +#include +#include + +#include "../planloader.h" +#include "substrait/proto/plan.pb.h" + +namespace io::substrait::textplan { +namespace { + +TEST(PlanLoaderTest, LoadAndSave) { + auto serializedPlan = load_substrait_plan("data/q6_first_stage.json"); + ASSERT_EQ(serializedPlan->error_message, nullptr); + + ::substrait::proto::Plan plan; + bool parseStatus = + plan.ParseFromArray(serializedPlan->buffer, serializedPlan->size); + ASSERT_TRUE(parseStatus) << "Failed to parse the plan."; + + const char* saveStatus = save_substrait_plan( + serializedPlan->buffer, + serializedPlan->size, + "outfile.splan", + PlanFileFormat::kText); + ASSERT_EQ(saveStatus, nullptr); + + free_substrait_plan(serializedPlan); +} + +TEST(PlanLoaderTest, LoadMissingFile) { + auto serializedPlan = load_substrait_plan("no_such_file.json"); + ASSERT_THAT( + serializedPlan->error_message, + ::testing::StartsWith("Failed to open file no_such_file.json")); + + free_substrait_plan(serializedPlan); +} + +} // namespace +} // namespace io::substrait::textplan diff --git a/src/substrait/common/Io.cpp b/src/substrait/common/Io.cpp index af06066c..a90ff853 100644 --- a/src/substrait/common/Io.cpp +++ b/src/substrait/common/Io.cpp @@ -55,6 +55,8 @@ absl::StatusOr<::substrait::proto::Plan> loadPlan( case PlanFileFormat::kText: return textplan::loadFromText(*contentOrError); } + // There are no other possibilities so this can't happen. + return absl::UnimplementedError("Unexpected format encountered."); } absl::Status savePlan( diff --git a/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake b/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake index 0d142f30..63d7d943 100644 --- a/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake +++ b/third_party/antlr4/cmake/ExternalAntlr4Cpp.cmake @@ -97,6 +97,7 @@ if(ANTLR4_ZIP_REPOSITORY) -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} -DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT} -DDISABLE_WARNINGS:BOOL=ON + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON # -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard # -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project INSTALL_COMMAND "" @@ -116,6 +117,7 @@ else() -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} -DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT} -DDISABLE_WARNINGS:BOOL=ON + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON # -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard # -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project INSTALL_COMMAND ""