Skip to content

Commit

Permalink
Merge pull request #5 from yc-huang/mph
Browse files Browse the repository at this point in the history
split native library to two to make liburing optional
  • Loading branch information
yc-huang authored Jan 2, 2024
2 parents ebf4cc5 + fb3698d commit 2bb64f3
Show file tree
Hide file tree
Showing 15 changed files with 777 additions and 263 deletions.
24 changes: 21 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {

group 'tech.bsdb'
archivesBaseName = "bsdb-core"
version '0.1.2'
version '0.1.3'

sourceCompatibility = 1.9

Expand Down Expand Up @@ -73,11 +73,13 @@ dependencies {
test {
testLogging.showStandardStreams = true
systemProperty "java.library.path", file("${buildDir}/libs/bsdbjni/shared").absolutePath
systemProperty "java.library.path", file("${buildDir}/libs/bsdburingjni/shared").absolutePath
}


processResources {
from "${buildDir}/libs/bsdbjni/shared/"
from "${buildDir}/libs/bsdburingjni/shared/"
}


Expand Down Expand Up @@ -108,7 +110,7 @@ model {
binaries {
all {
if (toolChain in Gcc) {
cCompiler.args "-O2"
cCompiler.args "-O3"
linker.args "-shared", "-fPIC", "-luring"
}
}
Expand Down Expand Up @@ -144,10 +146,22 @@ model {
c {
source {
lib library: 'bsdbjni', linkage: 'static'
lib library: 'jdk', linkage: 'api'
srcDir "src/main/c"
include "*.c"
}
}
}
}
bsdburingjni(NativeLibrarySpec) {
sources {
c {
source {
lib library: 'bsdburingjni', linkage: 'static'
lib library: 'liburing', linkage: 'api'
lib library: 'jdk', linkage: 'api'
srcDir "src/main/c"
include "**/*.c"
include "uring/*.c"
}
}
}
Expand Down Expand Up @@ -213,3 +227,7 @@ uploadArchives {
build.dependsOn("bsdbjniSharedLibrary")
test.dependsOn("bsdbjniSharedLibrary")
processResources.dependsOn("bsdbjniSharedLibrary")

build.dependsOn("bsdburingjniSharedLibrary")
test.dependsOn("bsdburingjniSharedLibrary")
processResources.dependsOn("bsdburingjniSharedLibrary")
120 changes: 120 additions & 0 deletions src/main/c/mph.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Sux: Succinct data structures
*
* Copyright (C) 2018-2020 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <math.h>
#include "spooky.h"
#include "mph.h"

mph *load_mph(int h) {
mph *mph = calloc(1, sizeof *mph);
read(h, &mph->size, sizeof mph->size);
uint64_t t;
read(h, &t, sizeof t);
mph->multiplier = t;
read(h, &mph->global_seed, sizeof mph->global_seed);
read(h, &mph->edge_offset_and_seed_length, sizeof mph->edge_offset_and_seed_length);
mph->edge_offset_and_seed = calloc(mph->edge_offset_and_seed_length, sizeof *mph->edge_offset_and_seed);
read(h, mph->edge_offset_and_seed, mph->edge_offset_and_seed_length * sizeof *mph->edge_offset_and_seed);

read(h, &mph->array_length, sizeof mph->array_length);
mph->array = calloc(mph->array_length, sizeof *mph->array);
read(h, mph->array, mph->array_length * sizeof *mph->array);
return mph;
}

static int inline _count_nonzero_pairs(const uint64_t x) {
return __builtin_popcountll((x | x >> 1) & 0x5555555555555555);
}

static uint64_t inline count_nonzero_pairs(const uint64_t start, const uint64_t end, const uint64_t * const array) {
int block = start / 32;
const int end_block = end / 32;
const int start_offset = start % 32;
const int end_offset = end % 32;

if (block == end_block) return _count_nonzero_pairs((array[block] & (UINT64_C(1) << end_offset * 2) - 1) >> start_offset * 2);
uint64_t pairs = 0;
if (start_offset != 0) pairs += _count_nonzero_pairs(array[block++] >> start_offset * 2);
while(block < end_block) pairs += _count_nonzero_pairs(array[block++]);
if (end_offset != 0) pairs += _count_nonzero_pairs(array[block] & (UINT64_C(1) << end_offset * 2) - 1);
return pairs;
}

static void inline signature_to_equation(const uint64_t *signature, const uint64_t seed, int num_variables, int *e) {
uint64_t hash[4];
spooky_short_rehash(signature, seed, hash);
const int shift = __builtin_clzll(num_variables);
const uint64_t mask = (UINT64_C(1) << shift) - 1;
e[0] = ((hash[0] & mask) * num_variables) >> shift;
e[1] = ((hash[1] & mask) * num_variables) >> shift;
e[2] = ((hash[2] & mask) * num_variables) >> shift;
}


#define OFFSET_MASK (UINT64_C(-1) >> 8)
#define C_TIMES_256 (int)(floor((1.09 + 0.01) * 256))

static uint64_t inline vertex_offset(const uint64_t edge_offset_seed) {
return ((edge_offset_seed & OFFSET_MASK) * C_TIMES_256 >> 8);
}

static int inline get_2bit_value(uint64_t *array, uint64_t pos) {
pos *= 2;
return array[pos / 64] >> pos % 64 & 3;
}

int64_t mph_get_byte_array(const mph *mph, char *key, uint64_t len) {
uint64_t signature[4];
spooky_short(key, len, mph->global_seed, signature);
const int bucket = ((__uint128_t)(signature[0] >> 1) * (__uint128_t)mph->multiplier) >> 64;
const uint64_t edge_offset_seed = mph->edge_offset_and_seed[bucket];
const uint64_t bucket_offset = vertex_offset(edge_offset_seed);
const int num_variables = vertex_offset(mph->edge_offset_and_seed[bucket + 1]) - bucket_offset;
int e[3];
signature_to_equation(signature, edge_offset_seed & ~OFFSET_MASK, num_variables, e);
return (edge_offset_seed & OFFSET_MASK) + count_nonzero_pairs(bucket_offset, bucket_offset + e[(get_2bit_value(mph->array, e[0] + bucket_offset) + get_2bit_value(mph->array, e[1] + bucket_offset) + get_2bit_value(mph->array, e[2] + bucket_offset)) % 3], mph->array);
}

int64_t mph_get_uint64_t(const mph *mph, const uint64_t key) {
uint64_t signature[4];
spooky_short(&key, 8, mph->global_seed, signature);
const int bucket = ((__uint128_t)(signature[0] >> 1) * (__uint128_t)mph->multiplier) >> 64;
const uint64_t edge_offset_seed = mph->edge_offset_and_seed[bucket];
const uint64_t bucket_offset = vertex_offset(edge_offset_seed);
const int num_variables = vertex_offset(mph->edge_offset_and_seed[bucket + 1]) - bucket_offset;
int e[3];
signature_to_equation(signature, edge_offset_seed & ~OFFSET_MASK, num_variables, e);
return (edge_offset_seed & OFFSET_MASK) + count_nonzero_pairs(bucket_offset, bucket_offset + e[(get_2bit_value(mph->array, e[0] + bucket_offset) + get_2bit_value(mph->array, e[1] + bucket_offset) + get_2bit_value(mph->array, e[2] + bucket_offset)) % 3], mph->array);
}

int64_t mph_get_uint128_t(const mph *mph, const __uint128_t key) {
uint64_t signature[4];
spooky_short(&key, 16, mph->global_seed, signature);
const int bucket = ((__uint128_t)(signature[0] >> 1) * (__uint128_t)mph->multiplier) >> 64;
const uint64_t edge_offset_seed = mph->edge_offset_and_seed[bucket];
const uint64_t bucket_offset = vertex_offset(edge_offset_seed);
const int num_variables = vertex_offset(mph->edge_offset_and_seed[bucket + 1]) - bucket_offset;
int e[3];
signature_to_equation(signature, edge_offset_seed & ~OFFSET_MASK, num_variables, e);
return (edge_offset_seed & OFFSET_MASK) + count_nonzero_pairs(bucket_offset, bucket_offset + e[(get_2bit_value(mph->array, e[0] + bucket_offset) + get_2bit_value(mph->array, e[1] + bucket_offset) + get_2bit_value(mph->array, e[2] + bucket_offset)) % 3], mph->array);
}
42 changes: 42 additions & 0 deletions src/main/c/mph.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Sux: Succinct data structures
*
* Copyright (C) 2018-2020 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

#include <inttypes.h>

#ifdef USE_MMAP
#include <sys/mman.h>
#include <sys/resource.h>
#define calloc(n, size) mmap((void *)(0x0UL), (n) * (size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | (30 << MAP_HUGE_SHIFT), 0, 0)
#endif

typedef struct {
uint64_t size;
uint64_t multiplier;
uint64_t global_seed;
uint64_t edge_offset_and_seed_length;
uint64_t *edge_offset_and_seed;
uint64_t array_length;
uint64_t *array;
} mph;

mph *load_mph(int h);
int64_t mph_get_byte_array(const mph *mph, char *key, uint64_t len);
int64_t mph_get_uint64_t(const mph *mph, uint64_t key);
int64_t mph_get_uint128_t(const mph *mph, __uint128_t key);
Loading

0 comments on commit 2bb64f3

Please sign in to comment.