Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] partial impl of poc APIs for hashmap #360

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/data_structure/hashmap/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_library(ds_hashmap hashmap.c)
221 changes: 221 additions & 0 deletions src/data_structure/hashmap/hashmap.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
#include "hashmap.h"

#include <cc_debug.h>


#define HM_BODY(_hm) ((char *)(_hm) + HASHMAP_HEADER_SIZE)
#define HM_END(_hm) ((char *)(_hm) + HASHMAP_HEADER_SIZE + HM_NBODY(hm))


static inline uint8_t
_entry_klen(char *entry)
{
return *((uint8_t *)(entry));
}

static inline uint8_t
_entry_vlen(char *entry)
{
return *((uint8_t *)((entry) + sizeof(uint8_t)));
}

static inline uint32_t
_entry_nbyte(char *entry)
{
return (uint32_t)HASHMAP_ENTRY_HEADER_SIZE + _entry_klen(entry) +
_entry_vlen(entry);
}

static inline char *
_entry_key(char *entry)
{
return entry + HASHMAP_ENTRY_HEADER_SIZE;
}

static inline char *
_entry_val(char *entry)
{
return entry + HASHMAP_ENTRY_HEADER_SIZE + _entry_klen(entry);
}

static inline void
_entry_set(char *entry, const struct bstring *key, const struct bstring *val)
{
*((uint8_t *)entry) = key->len;
*((uint8_t *)entry + sizeof(uint8_t)) = val->len;
cc_memcpy(entry + HASHMAP_ENTRY_HEADER_SIZE, key->data, key->len);
cc_memcpy(entry + HASHMAP_ENTRY_HEADER_SIZE + key->len, val->data, val->len);
}

static inline char *
_entry_val(char *entry)
{
return entry + HASHMAP_ENTRY_HEADER_SIZE + _entry_klen(entry);
}

static inline char *
_next_entry(char *entry)
{
return entry + HASHMAP_ENTRY_HEADER_SIZE + _entry_klen(entry) + _entry_vlen(entry);
}


/* returns true if an exact match is found, false otherwise.
* If a match is found, the position of the entry element is stored in pos;
* otherwise, pos contains the position of the insertion spot
*/
static inline bool
_locate(char **pos, uint32_t *idx, const char *entry, uint32_t nentry, struct bstring *key)
{
uint32_t i;
int bcmp_sgn;
int eklen_sgn;

ASSERT(idx != NULL);

if (nentry == 0) {
return false;
}

for (*pos = entry; *idx < nentry; *idx += 1) {
uint8_t eklen = _entry_klen(*pos);
bcmp_sgn = cc_bcmp(key->data, _entry_key(*pos), MIN(key->len, eklen));
if (bcmp_sgn > 0) { /* no match, insert position found */
return false;
}
if (bcmp_sign < 0) { /* no match, next entry */
*pos = _next_entry(*pos);
continue;
}

/* bcmp_sign == 0, may need to look at length. A shorter key is smaller */

/* -1: key is shorter than eklen; 0: equal length; 1: key is longer */
eklen_sgn = (key->len > eklen) - (eklen > key->len);
if (eklen_sgn > 0) { /* no match, insert position found */
return false;
}
if (eklen_sgn < 0) { /* no match, next entry */
*pos = _next_entry(*pos);
continue;
}

return true; /* match iff bcmp_sgn and eklen_sgn are both 0 */
}

return false; /* *pos points to the end of body */
}


hashmap_rstatus_e
hashmap_init(hashmap_p hm)
{
if (hm == NULL) {
log_debug("NULL pointer encountered for hm");

return HASHMAP_ERROR;
}

HM_NENTRY(hm) = 0;
HM_NBYTE(hm) = 0;

return HASHMAP_OK;
}


hashmap_rstatus_e
hashmap_get(struct bstring *val, const hashmap_p hm, const struct bstring *key)
{
uint32_t idx, nentry;
char *entry;

if (key == NULL || val == NULL || hm == NULL) {
log_debug("NULL pointer encountered for hm %p, key %p, or val %p", hm,
key, val);

return HASHMAP_ERROR;
}

idx = 0;
nentry = hashmap_nentry(hm);
if (_locate(&entry, &idx, HM_BODY(hm), nentry, key)) { /* found */
val->len = _entry_vlen(entry);
val->data = entry_val(entry);
return HASHMAP_OK;
} else {
val->len = 0;
val->data = NULL;
return HASHMAP_ENOTFOUND;
}
}


uint32_t
hashmap_multiget(struct bstring *val[], const hashmap_p hm, const struct bstring *key[], uint32_t cardinality)
{
uint32_t k, idx, nentry, nfound;
char *entry, *curr;

if (key == NULL || val == NULL || hm == NULL) {
log_debug("NULL pointer encountered for hm %p, key %p, or val %p", hm,
key, val);

return HASHMAP_ERROR;
}

idx = 0;
nentry = hashmap_nentry(hm);
for (k = 0, curr = HM_BODY(hm), nfound = 0; k < cardinality; k++) {
if (_locate(&entry, &idx, curr, nentry, key)) { /* found */
val[k]->len = _entry_vlen(entry);
val[k]->data = entry_val(entry);
nfound++;
} else {
val[k]->len = 0;
val[k]->data = NULL;
}
}

return nfound;
}


hashmap_rstatus_e
hashmap_insert(hashmap_p hm, const struct bstring *key, const struct bstring *val)
{
bool found;
char *body, *entry;
uint32_t idx, nentry;

if (hm == NULL) {
log_debug("NULL pointer encountered for hm");

return HASHMAP_ERROR;
}

if (key->len > UINT8_MAX || val->len > UINT8_MAX) {
log_debug("key / value size too big for current hashmap implementation:"
"key size: %"PRIu32", val size: %"PRIu32". (Allowed: %"PRIu8")",
key->len, val->len, UINT8_MAX);

return HASHMAP_EINVALID;
}

body = HM_BODY(hm);
nentry = hashmap_nentry(hm);

if (_locate(&entry, &idx, HM_BODY(hm), nentry, key)) { /* found */
return HASHMAP_EDUP;
}

if (entry < HM_END(hm)) {
cc_memmove(entry + HASHMAP_ENTRY_HEADER_SIZE + key->len + val->len,
entry, HM_END(hm) - entry);
}
_entry_set(entry, key, val);

HM_NENTRY(hm) += 1;
HM_NBODY(hm) += _entry_nbyte(entry);

return HASHMAP_OK;
}
97 changes: 97 additions & 0 deletions src/data_structure/hashmap/hashmap.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#pragma once

/* This is an implementation of hashmaps with bounded but flexible entry size
* with binary field keys. The size of both field key and value are limited to
* 255 bytes in this POC.
*
* The fields are sorted but not indexed. This makes bulk lookup faster when the
* field (keys) are also sorted.
*
* ----------------------------------------------------------------------------
*
* HASHMAP OVERALL LAYOUT
* =====================
*
* The general layout of the hashmap is as follows:
* entry
* ╭------------------------╮
* <nentry><nbody><eklen><evlen><ekey><eval> ... <eklen><evlen><ekey><eval>
* ╰-------------╯╰-------------------------------------------------------╯
* header body
*
* Overhead: 8 bytes (two 32-bit integers)
*
* <uint32_t nentry> is the number of entries.
* <uint32_t nbody> is the number of bytes in the body (not including header).
*
*
*
* HASHMAP ENTRIES
* ==============
*
* For each entry:
* <uint8_t eklen> is the size of hash field in each entry (entry key)
* <uint8_t evlen> is the size of hash value in each entry (entry value)
*
* The rest of the entry is a tuple of a binary string (non-empty byte array)
* for field and a byte array for value.
*
* RUNTIME
* =======
*
* Entry lookup takes O(N) where N is the number of entries in the list.
*
* Insertion and removal of entries involve scan-based lookup, as well as
* shifting data. So in additional to the considerations above, the amount of
* data being moved for updates will affect performance. Updates near the "fixed
* end" of the hashmap (currently the beginning) require moving more data and
* therefore will be slower. Overall, it is cheapest to perform updates at the
* end of the array due to zero data movement.
*
*/

#include <cc_bstring.h>

#include <stdint.h>

#define HASHMAP_HEADER_SIZE (sizeof(uint32_t) + sizeof(uint32_t)) /* 8 */
#define HASHMAP_ENTRY_HEADER_SIZE (sizeof(uint8_t) + sizeof(uint8_t)) /* 2 */

typedef char * hashmap_p;

typedef enum {
HASHMAP_OK,
HASHMAP_ENOTFOUND, /* value not found error */
HASHMAP_EINVALID, /* invalid (entry) data error */
HASHMAP_EDUP, /* duplicate entry found */
HASHMAP_ERROR,
HASHMAP_SENTINEL
} hashmap_rstatus_e;

#define HM_NENTRY(_hm) (*((uint32_t *)(_hm)))
#define HM_NBODY(_hm) (*((uint32_t *)((_hm) + sizeof(uint32_t))))

static inline uint32_t
hashmap_nentry(const hashmap_p hm)
{
return HM_NENTRY(hm);
}

static inline uint32_t
hashmap_size(const hashmap_p hm)
{
return HASHMAP_HEADER_SIZE + HM_NBODY(hm);
}

/* initialize an hashmap of key size 1/2/4/8 bytes and vsize */
hashmap_rstatus_e hashmap_init(hashmap_p hm);

/* hashmap APIs: seek */
hashmap_rstatus_e hashmap_get(struct bstring *val, const hashmap_p hm, const struct bstring *key);
uint32_t hashmap_multiget(struct bstring *val[], const hashmap_p hm, const struct bstring *key[], uint32_t cardinality);

/* hashmap APIs: modify */
hashmap_rstatus_e hashmap_insert(hashmap_p hm, const struct bstring *key, const struct bstring *val);
hashmap_rstatus_e hashmap_bulk_insert(hashmap_p hm, const struct bstring *key, const struct bstring *val);
hashmap_rstatus_e hashmap_remove(hashmap_p hm, const struct bstring *key);
hashmap_rstatus_e hashmap_bulk_insert(hashmap_p hm, const struct bstring *key, const struct bstring *val);
1 change: 1 addition & 0 deletions src/rust/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.vscode/