Skip to content

Commit

Permalink
Merge pull request #125 from xhochy/arrow-parameters
Browse files Browse the repository at this point in the history
Add support for Apache Arrow parameters
  • Loading branch information
MathMagique authored Sep 29, 2017
2 parents 46e9c87 + 9325e9c commit c6dd8e2
Show file tree
Hide file tree
Showing 19 changed files with 672 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ before_install: |
fi
install:
- pip install numpy==1.10.4 pyarrow==0.6.0 six twine pytest-cov coveralls
- pip install numpy==1.10.4 pyarrow==0.7.0 six twine pytest-cov coveralls pandas

before_script: |
if [ "$TRAVIS_OS_NAME" == "linux" ]; then
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@ Version history / changelog

From version 2.0.0, turbodbc adapts semantic versioning.

Version 2.4.0
-------------

* Support Apache Arrow ``pyarrow.Table`` objects as the input for
``executemanycolumns()``. In addition to direct Arrow support, this
should also help with more graceful handling of Pandas DataFrames
as ``pa.Table.from_pandas(…)`` handles additional corner cases of
Pandas data structures.

Version 2.3.0
-------------

Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,4 @@ after_test:

artifacts:
- path: 'turbodbc*.whl'
name: Binary wheel
name: Binary wheel
37 changes: 23 additions & 14 deletions cmake_scripts/FindArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,16 @@ if (NOT ARROW_HOME)
$ENV{VIRTUAL_ENV}/lib/*/site-packages/pyarrow/include)
get_filename_component(ARROW_SEARCH_LIB_PATH ${ARROW_INCLUDE_DIR} DIRECTORY)
else()
find_path(ARROW_INCLUDE_DIR arrow/api.h HINTS
/usr/local/lib/*/dist-packages/pyarrow/include)
if (MSVC)
find_path(ARROW_INCLUDE_DIR arrow/api.h HINTS
$ENV{PYTHON}/lib/site-packages/pyarrow/include)
else()
find_path(ARROW_INCLUDE_DIR arrow/api.h HINTS
/usr/local/lib/*/dist-packages/pyarrow/include)
endif()
get_filename_component(ARROW_SEARCH_LIB_PATH ${ARROW_INCLUDE_DIR} DIRECTORY)
set(ARROW_SEARCH_HEADER_PATHS ${ARROW_INCLUDE_DIR})
message(STATUS "Found candidate Arrow location: ${ARROW_SEARCH_LIB_PATH}")
endif()
else()
set(ARROW_SEARCH_HEADER_PATHS
Expand All @@ -68,17 +75,17 @@ else()
)
endif()

if (MSVC)
SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll")
endif()

find_library(ARROW_LIB_PATH NAMES arrow
PATHS
${ARROW_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)
message(STATUS "Found ${ARROW_LIB_PATH} in ${ARROW_SEARCH_LIB_PATH}")
get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY)

find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc
PATHS
${ARROW_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)

find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python
PATHS
${ARROW_SEARCH_LIB_PATH}
Expand All @@ -87,18 +94,21 @@ find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python
if (ARROW_INCLUDE_DIR AND ARROW_LIBS)
set(ARROW_FOUND TRUE)

set(ARROW_LIB_NAME arrow)
set(ARROW_PYTHON_LIB_NAME arrow_python)

if (MSVC)
set(ARROW_STATIC_LIB ${ARROW_LIB_PATH})
set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH})
set(ARROW_SHARED_LIB ${ARROW_STATIC_LIB})
set(ARROW_PYTHON_SHARED_LIB ${ARROW_PYTHON_STATIC_LIB})
set(ARROW_STATIC_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIBS}/${ARROW_PYTHON_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
set(ARROW_SHARED_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
set(ARROW_SHARED_IMP_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}.lib)
set(ARROW_PYTHON_SHARED_IMP_LIB ${ARROW_LIBS}/${ARROW_PYTHON_LIB_NAME}.lib)
else()
set(ARROW_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow.a)
set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_python.a)
set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_jemalloc.a)

set(ARROW_SHARED_LIB ${ARROW_LIBS}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX})
set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_LIBS}/libarrow_jemalloc${CMAKE_SHARED_LIBRARY_SUFFIX})
set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/libarrow_python${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
Expand All @@ -107,7 +117,6 @@ if (ARROW_FOUND)
if (NOT Arrow_FIND_QUIETLY)
message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}")
message(STATUS "Found the Arrow Python library: ${ARROW_PYTHON_LIB_PATH}")
message(STATUS "Found the Arrow jemalloc library: ${ARROW_JEMALLOC_LIB_PATH}")
endif ()
else ()
if (NOT Arrow_FIND_QUIETLY)
Expand Down
2 changes: 1 addition & 1 deletion contrib/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ RUN apt-get install -y mysql-server-5.6 mysql-client-core-5.6 mysql-client-5.6 l
RUN apt-get install -y postgresql odbc-postgresql=1:09.02.0100-2ubuntu1

RUN pip install -U pip setuptools setuptools_scm
RUN pip install -U numpy==1.10.4 pyarrow==0.6.0 pybind11==2.2.0 pytest pytest-cov mock six
RUN pip install -U numpy==1.10.4 pyarrow==0.7.0 pybind11==2.2.0 pytest pytest-cov mock six pandas
7 changes: 4 additions & 3 deletions contrib/docker/build_and_test_in_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ pushd /io
export ODBCSYSINI=`pwd`/travis/odbc
export TURBODBC_TEST_CONFIGURATION_FILES="query_fixtures_postgresql.json,query_fixtures_mysql.json"

mkdir -p build_docker && cd build_docker
cmake -DCMAKE_INSTALL_PREFIX=./dist -DPYBIND11_PYTHON_VERSION=2.7 ..
make -j5

/etc/init.d/mysql start
mysql -u root -e 'CREATE DATABASE test_db;'

/etc/init.d/postgresql start
sudo -u postgres psql -U postgres -c 'CREATE DATABASE test_db;'
sudo -u postgres psql -U postgres -c "ALTER USER postgres WITH PASSWORD 'password';"

mkdir -p build_docker && cd build_docker
cmake -DCMAKE_INSTALL_PREFIX=./dist -DPYBIND11_PYTHON_VERSION=2.7 ..
make -j5
ctest --verbose
2 changes: 1 addition & 1 deletion contrib/docker/build_image.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#/bin/bash
#!/bin/bash

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd ${SCRIPT_DIR}
Expand Down
16 changes: 13 additions & 3 deletions cpp/turbodbc_arrow/Library/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,20 @@ target_link_libraries(turbodbc_arrow_support
PUBLIC ${Odbc_LIBRARIES}
PUBLIC cpp_odbc
PUBLIC turbodbc
PUBLIC arrow
PUBLIC arrow_python
)

if (MSVC)
target_link_libraries(turbodbc_arrow_support
PUBLIC ${ARROW_SHARED_IMP_LIB}
PUBLIC ${ARROW_PYTHON_SHARED_IMP_LIB}
)
else()
target_link_libraries(turbodbc_arrow_support
PUBLIC ${ARROW_SHARED_LIB}
PUBLIC ${ARROW_PYTHON_SHARED_LIB}
)
endif()

add_custom_command(TARGET turbodbc_arrow_support POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
$<TARGET_FILE:turbodbc_arrow_support> ${CMAKE_BINARY_DIR})
Expand All @@ -30,7 +40,7 @@ endif()

install(
TARGETS turbodbc_arrow_support
LIBRARY DESTINATION lib
DESTINATION lib
)

install(
Expand Down
7 changes: 6 additions & 1 deletion cpp/turbodbc_arrow/Library/src/arrow_result_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

// Somewhere a macro defines BOOL as a constant. This is in conflict with array/type.h
#undef BOOL
#undef timezone
#include <arrow/api.h>
#include <arrow/python/pyarrow.h>

#include <sql.h>

#include <turbodbc/errors.h>
#include <turbodbc/time_helpers.h>

#include <ciso646>
#include <vector>

using arrow::default_memory_pool;
Expand Down Expand Up @@ -228,7 +231,9 @@ Status arrow_result_set::fetch_all_native(std::shared_ptr<arrow::Table>* out)
pybind11::object arrow_result_set::fetch_all()
{
std::shared_ptr<arrow::Table> table;
fetch_all_native(&table);
if (not fetch_all_native(&table).ok()) {
throw turbodbc::interface_error("Fetching Arrow result set failed.");
}

arrow::py::import_pyarrow();
return pybind11::reinterpret_borrow<pybind11::object>(pybind11::handle(arrow::py::wrap_table(table)));
Expand Down
11 changes: 9 additions & 2 deletions cpp/turbodbc_arrow/Library/src/python_bindings.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include <turbodbc_arrow/arrow_result_set.h>
#include <turbodbc_arrow/set_arrow_parameters.h>
#include <turbodbc/cursor.h>

#include <pybind11/pybind11.h>

Expand All @@ -12,6 +14,11 @@ arrow_result_set make_arrow_result_set(std::shared_ptr<turbodbc::result_sets::re
return arrow_result_set(*result_set_pointer, strings_as_dictionary);
}

void set_arrow_parameters(turbodbc::cursor & cursor, pybind11::object const & pyarrow_table)
{
turbodbc_arrow::set_arrow_parameters(cursor.get_command()->get_parameters(), pyarrow_table);
}

}

PYBIND11_MODULE(turbodbc_arrow_support, module)
Expand All @@ -21,6 +28,6 @@ PYBIND11_MODULE(turbodbc_arrow_support, module)
pybind11::class_<arrow_result_set>(module, "ArrowResultSet")
.def("fetch_all", &arrow_result_set::fetch_all);

module.def("make_arrow_result_set", make_arrow_result_set,
pybind11::arg("result_set"), pybind11::arg("strings_as_dictionary"));
module.def("make_arrow_result_set", make_arrow_result_set);
module.def("set_arrow_parameters", set_arrow_parameters);
}
Loading

0 comments on commit c6dd8e2

Please sign in to comment.