/* * Copyright 2005-2009 Nicolas Bernard * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* sha2 requires openssl >= 0.9.8 */ /* sha1 is currently used */ #include #include #include #include #include #include #include #include #include #include "bloom.h" #ifdef WITH_DATALOC #include "dataloc.h" #endif #include "debug.h" #define BLOOMSINITSIZE 100 #define SALTSIZE 64 struct bloom_internal { unsigned long size; unsigned char *filter; unsigned int nbhashes; unsigned char *salt; }; static int initialized = 0; static unsigned int bloomssize = 0; static struct bloom_internal **blooms = NULL; static unsigned char* openssl_prandbuf(unsigned int size) { unsigned char* buf = NULL; BIGNUM* rnd = BN_new(); if (rnd == NULL) return NULL; buf = (unsigned char*) malloc(size); if (buf == NULL) {BN_free(rnd); return NULL;} if (!BN_rand(rnd, size * 8, -1, 0)) { free(buf); return NULL; } BN_bn2bin(rnd, buf); BN_free(rnd); return buf; } static int existbloom(int f) { if (!initialized) return 0; if (f < 0) return 0; if ((unsigned int) f >= bloomssize) return 0; if (blooms[f] == NULL) return 0; return 1; } static int initblooms(void) { if (initialized || bloomssize || blooms != NULL) return -1; blooms = (struct bloom_internal**) calloc(BLOOMSINITSIZE, sizeof(struct bloom_internal*)); if (blooms == NULL) return -1; bloomssize = BLOOMSINITSIZE; initialized = 1; return 0; } static int resizeblooms(void) { unsigned int i = 0; struct bloom_internal** blooms_tmp = NULL; if (!initialized || !bloomssize || blooms == NULL) return -1; blooms_tmp = (struct bloom_internal**) realloc(blooms, 2 * bloomssize * sizeof(struct bloom_internal*)); if (blooms_tmp == NULL) return -1; blooms = blooms_tmp; for (i = bloomssize; i < 2 * bloomssize; i++) blooms[i] = NULL; bloomssize *= 2; return 0; } int newbloom(unsigned long bsize, unsigned int nbhashes, unsigned int salted) { unsigned short i = 0; if (!initialized) if (initblooms()) return -1; for (i = 0; i < bloomssize; i++) if (blooms[i] == NULL) break; if (i == bloomssize) { if (resizeblooms()) return -1; } if (salted != 0 && salted != 1) return -7; if (nbhashes < 1 || nbhashes > 16) return -7; { /* we can now add a bloom at blooms[i] */ unsigned int j = 0; unsigned long size = 0xDeadBeef; unsigned char *tmp = (unsigned char*) 0xDeadBeef; unsigned char *tmpsalt = NULL; struct bloom_internal* tmpb = (struct bloom_internal*) 0xDeadBeef; size = bsize / 8; if (bsize % 8) size++; assert(sizeof(unsigned char) == 1); tmp = (unsigned char*) malloc(size); if (tmp == NULL) return -3; tmpb = (struct bloom_internal*) malloc(sizeof(struct bloom_internal)); if (tmpb == NULL) { free(tmp); return -3; } if (salted != 0) { tmpsalt = openssl_prandbuf(SALTSIZE); if (tmpsalt == NULL) { free(tmp); free(tmpb); return -3; } } for (j = 0; j < size; j++) tmp[j] = 0; tmpb->filter = tmp; tmpb->size = size; tmpb->nbhashes = nbhashes; tmpb->salt = tmpsalt; blooms[i] = tmpb; } return (int) i; } int destroybloom(int bd) { if (!existbloom(bd)) return -2; { /* destroy blooms[bd] */ struct bloom_internal* tmp = blooms[bd]; blooms[bd] = NULL; free(tmp->filter); if (tmp->salt != NULL) { memset(tmp->salt, 0, 16); free(tmp->salt); } free(tmp); } return 0; } static inline unsigned char mask(unsigned int shift) { assert(shift < 8); switch(shift) { case 0: return 1; case 1: return 2; case 2: return 4; case 3: return 8; case 4: return 16; case 5: return 32; case 6: return 64; case 7: return 128; } panic("aborting, shift: %u", shift); } //static const unsigned char mask[] = {1, 2, 4, 8, 16, 32, 64, 128}; static inline int touchpos(int bd, unsigned long int pos) { unsigned long part = pos / 8; unsigned int shift = pos % 8; if (blooms[bd]->size <= part) return -4; blooms[bd]->filter[part] |= mask(shift); return 0; } static inline int issetpos(int bd, unsigned long int pos) { if (!existbloom(bd)) return -1; { unsigned long part = pos / 8; unsigned int shift = pos % 8; if (blooms[bd]->size <= part) return -4; if (blooms[bd]->filter[part] & mask(shift)) return 1; } return 0; } void displayhash(struct blhashes *blh, FILE* out) { assert(blh != NULL); assert(out != NULL); fprintf(out, "[%d][%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:" "%lX:%lX:%lX:%lX]\n", blh->bloomid, blh->hashes[0], blh->hashes[1], blh->hashes[2], blh->hashes[3], blh->hashes[4], blh->hashes[5], blh->hashes[6], blh->hashes[7], blh->hashes[8], blh->hashes[9], blh->hashes[10], blh->hashes[11], blh->hashes[12], blh->hashes[13], blh->hashes[14], blh->hashes[15]); } static int hash(int bd, unsigned long int hashes[16], const void* data, unsigned long datasize) { unsigned char hash[64]; /* Flawfinder: ignore */ unsigned char tmp[20+SALTSIZE]; /* Flawfinder: ignore */ assert(blooms[bd]->nbhashes >= 1 && blooms[bd]->nbhashes < 17); if (data == NULL || datasize == 0) return -1; /* SHA512(data, datasize, hash); */ SHA1(data, datasize, tmp + SALTSIZE); if (blooms[bd]->salt != NULL) { /* Flawfinder: ignore */ memcpy(tmp, blooms[bd]->salt, SALTSIZE); SHA1(tmp, SALTSIZE+20, hash); } else SHA1(tmp + SALTSIZE, 20, hash); SHA1(hash, 20, hash + 20); SHA1(hash + 20, 20, hash + 40); SHA1(hash + 10, 20, hash + 44); #if 0 /* verify assumption on data sizes */ assert(sizeof(unsigned long int) * 16 == sizeof hash); /* Flawfinder: ignore */ memcpy(hashes, hash, sizeof hash); #else /* this works even if unsigned long int are more than 32 bits long. While they will then be only half initialized, this should not be an issue, unless we have bloom filters bigger than 4Gio. */ for (int i = 0; i < 16; i++) { hashes[i] = 0; hashes[i] = ((uint32_t*) hash)[i]; } #endif return 0; } struct blhashes* bloomhash(int bd, struct blhashes* blh, const void* data, unsigned long datasize) { if (!existbloom(bd)) return NULL; if (blh == NULL) blh = (struct blhashes*) calloc(1, sizeof(struct blhashes)); if (blh == NULL) return NULL; blh->bloomid = bd; if (hash(bd, blh->hashes, data, datasize) == 0) return blh; return NULL; } int add(int bd, const void* data, unsigned long datasize) { unsigned int i = 0; unsigned long int hashes[16]; if (!existbloom(bd)) return -1; if (hash(bd, hashes, data, datasize) != 0) return -5; for (i = 0; i < blooms[bd]->nbhashes; i++) { int ret = touchpos(bd, hashes[i] % (blooms[bd]->size * 8)); if (ret != 0) return ret; } return 0; } int addhash(int bd, const struct blhashes *blh) { unsigned int i = 0; assert(blh != NULL); if (!existbloom(bd)) return -1; if (blh->bloomid != bd) return -2; for (i = 0; i < blooms[bd]->nbhashes; i++) { int ret = touchpos(bd, blh->hashes[i] % (blooms[bd]->size * 8)); if (ret != 0) return ret; } return 0; } int isin(int bd, const void* data, unsigned long datasize) { unsigned int i = 0; unsigned long int hashes[16]; if (!existbloom(bd)) { stkdbg("Invalid bloom descriptor! (%d)", bd); return -1; } if (hash(bd, hashes, data, datasize) != 0) { stkdbg("bloom hash failed"); return -5; } for (i = 0; i < blooms[bd]->nbhashes; i++) { int ret = issetpos(bd, hashes[i] % (blooms[bd]->size * 8)); if (ret != 1) return ret; } return 1; } int isinbloom(int bd, const struct blhashes *blh) { unsigned int i = 0; assert(blh != NULL); if (!existbloom(bd)) { stkdbg("Invalid bloom descriptor! (%d)", bd); return -1; } if (blh->bloomid != bd) { stkdbg("Error, comparison with data of another hash! (%d, %d)", blh->bloomid, bd); return -2; } for (i = 0; i < blooms[bd]->nbhashes; i++) { int ret = issetpos(bd, blh->hashes[i] % (blooms[bd]->size * 8)); if (ret != 1) return ret; } return 1; } #ifdef WITH_DATALOC int bloompack(struct dataloc *loc, int bd) { uint8_t isdata = 1; if (!existbloom(bd)) { isdata = 0; } int err = writeloc(loc, &isdata, sizeof isdata); if (err < 0) { debug("writeloc: %d %s", err, (err == -2 || err == -3 || err == -20) ? strerror(errno) : ""); return -10; } if (!isdata) return 0; debug("will write filter of size %lu", blooms[bd]->size); err = writeloc(loc, &blooms[bd]->size, sizeof blooms[bd]->size); if (err < 0) { debug("writeloc: %d %s", err, (err == -2 || err == -3 || err == -20) ? strerror(errno) : ""); return -20; } err = writeloc(loc, blooms[bd]->filter, blooms[bd]->size); if (err < 0) { debug("writeloc: %d %s", err, (err == -2 || err == -3 || err == -20) ? strerror(errno) : ""); return -30; } err = writeloc(loc, &blooms[bd]->nbhashes, sizeof blooms[bd]->nbhashes); if (err < 0) { debug("writeloc: %d %s", err, (err == -2 || err == -3 || err == -20) ? strerror(errno) : ""); return -40; } err = writeloc(loc, blooms[bd]->salt, SALTSIZE); if (err < 0) { debug("writeloc: %d %s", err, (err == -2 || err == -3 || err == -20) ? strerror(errno) : ""); return -50; } return 0; } int bloomunpack(struct dataloc *loc) { int err = 0; unsigned int i = 0; if (!initialized) if (initblooms()) return -1; uint8_t isdata = 123; err = readloc(loc, &isdata, sizeof isdata); if (err < 0) return -3; if (isdata == 0) return 0; if (isdata != 1) return -5; for (i = 0; i < bloomssize; i++) if (blooms[i] == NULL) break; if (i == bloomssize) { if (resizeblooms()) return -10; } struct bloom_internal* tmpb = (struct bloom_internal*) 0xDeadBeef; tmpb = (struct bloom_internal*) malloc(sizeof(struct bloom_internal)); if (tmpb == NULL) return -20; err = readloc(loc, &tmpb->size, sizeof tmpb->size); if (err < 0) { free(tmpb); return -30; } debug("will read filter of size %lu", tmpb->size); tmpb->filter = (unsigned char*) malloc(tmpb->size); if (tmpb->filter == NULL) { free(tmpb); return -40; } tmpb->salt = (unsigned char*) malloc(SALTSIZE); if (tmpb->salt == NULL) { free(tmpb->filter); free(tmpb); return -45; } err = readloc(loc, tmpb->filter, tmpb->size); if (err < 0) { free(tmpb->filter); free(tmpb->salt); free(tmpb); return -50; } err = readloc(loc, &tmpb->nbhashes, sizeof tmpb->nbhashes); if (err < 0) { free(tmpb->filter); free(tmpb->salt); free(tmpb); return -60; } err = readloc(loc, tmpb->salt, SALTSIZE); if (err < 0) { free(tmpb->filter); free(tmpb->salt); free(tmpb); return -70; } blooms[i] = tmpb; return (int) i; } #endif /* WITH_DATALOC */