1: /*
2: * Copyright 2005-2009 Nicolas Bernard <http://www.lafraze.net/nbernard/>
3: *
4: * Permission to use, copy, modify, and distribute this software for any
5: * purpose with or without fee is hereby granted, provided that the above
6: * copyright notice and this permission notice appear in all copies.
7: *
8: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15: */
16:
17: #ifndef _BLOOM_H_
18: #define _BLOOM_H_
19:
20: /**
21: * newbloom create a new bloom filter.
22: * parameters:
23: * first: size of the filter in bits.
24: * second: number of hashes to use. must be in [1, 16]
25: * last: should we use a random salt for this filter?
26: * if set, it is harder for a malicious user to polute
27: * the filter. The drawback is that a same value won't
28: * be hashed in the same value in to different filter.
29: * returned value:
30: * an int which is a bloom filter descriptor or a value < 0
31: * in case of error.
32: *
33: * WARNING: OpenSSL should have been initialized beforehand.
34: */
35: extern int newbloom(unsigned long, unsigned int, unsigned int);
36:
37: /**
38: * destroybloom destroy a bloom filter.
39: * parameters:
40: * int bd, where bd is the descriptor of the filter to destroy
41: * returned value:
42: * 0 if successful, < 0 otherwise.
43: */
44: extern int destroybloom(int);
45:
46: /**
47: * add: add some data in a filter.
48: * parameters:
49: * int bd, the descriptor of the filter in which the data will be
50: * registered;
51: * void* data, a buffer with the data;
52: * unsigned long datasize, the size of the data buffer.
53: * returned value:
54: * 0 if successful, < 0 otherwise.
55: */
56: extern int add(int, const void*, unsigned long);
57:
58:
59: /**
60: * isin: match some data against a filter.
61: * parameters:
62: * int bd, the descriptor of the filter in which the data will be
63: * searched;
64: * void* data, a buffer with the data;
65: * unsigned long datasize, the size of the data buffer.
66: * returned value:
67: * 0 if the data match the filter, 0 if not, < 0 if an error occured
68: */
69: extern int isin(int, const void*, unsigned long);
70:
71:
72: /**
73: *
74: * The struct blhashes, combined with the bloomhash, addhash and isinbloom
75: * functions allow finer grained operations (compared to add and isin).
76: *
77: * nb: this should be considered as an opaque type.
78: *
79: */
80: struct blhashes {
81: int bloomid;
82: unsigned long int hashes[16];
83: };
84:
85: /**
86: * bloomhash: compute the hash of some data (for the given bloomfilter bd)
87: * and return it in a struct blhashes.
88: *
89: * parameters:
90: * int bd, the descriptor of the filter for which hash will be computed;
91: * struct blhashes* blh, the structure in which the hash will be stored.
92: * if NULL, it will be allocated dynamically.
93: * void* data, the data to be hashed. They are not modified;
94: * unsigned long datasize, the size of the data in octets.
95: *
96: * returned value:
97: */
98: extern struct blhashes* bloomhash(int bd, struct blhashes* blh, const void* data, unsigned long datasize);
99:
100: /**
101: * addhash: same as add, but for pre-hashed data.
102: *
103: * parameters:
104: * int bd, the descriptor of the filter in which the data will be
105: * registered;
106: * struct blhashes *blh, the hash of the data to be registered.
107: *
108: * returned value:
109: * 0 if all is well;
110: * <0 if an error occures (consult sources or guru).
111: */
112: extern int addhash(int bd, const struct blhashes *blh);
113:
114: /**
115: * isinbloom: takes a bloom descriptor and a struct blhashes and tell whether
116: * the corresponding data are already in the bloom filter or not.
117: *
118: * parameters:
119: * int bd, the bloom filter descriptor to search;
120: * struct blhashes *blh, the hash of the data to search for.
121: *
122: * returned value:
123: * < 0 in case of error;
124: * 0 if there is no match;
125: * 1 if there is a match (remember, this is probabilistic!).
126: */
127: extern int isinbloom(int bd, const struct blhashes *blh);
128:
129: /**
130: * displayhash: (for debug purpose only): displays the content of a
131: * struct blhashes.
132: * parameters:
133: * struct blhashes *blh, the structure to display;
134: * FILE* out, the stream on which the data have to be sent.
135: */
136: extern void displayhash(struct blhashes *blh, FILE* out);
137:
138: #ifdef WITH_DATALOC
139:
140: #include "dataloc.h"
141:
142: extern int bloompack(struct dataloc *loc, int bd);
143: extern int bloomunpack(struct dataloc *loc);
144:
145: #endif /* WITH_DATALOC */
146:
147: #endif /* _BLOOM_H_ */
1: /*
2: * Copyright 2005-2009 Nicolas Bernard <http://www.lafraze.net/nbernard/>
3: *
4: * Permission to use, copy, modify, and distribute this software for any
5: * purpose with or without fee is hereby granted, provided that the above
6: * copyright notice and this permission notice appear in all copies.
7: *
8: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15: */
16:
17: /* sha2 requires openssl >= 0.9.8 */
18: /* sha1 is currently used */
19:
20: #include <openssl/rand.h>
21: #include <openssl/bn.h>
22: #include <openssl/dh.h>
23: #include <openssl/sha.h>
24: #include <assert.h>
25: #include <errno.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
30: #include "bloom.h"
31: #ifdef WITH_DATALOC
32: #include "dataloc.h"
33: #endif
34: #include "debug.h"
35:
36: #define BLOOMSINITSIZE 100
37: #define SALTSIZE 64
38:
39: struct bloom_internal {
40: unsigned long size;
41: unsigned char *filter;
42: unsigned int nbhashes;
43: unsigned char *salt;
44: };
45:
46: static int initialized = 0;
47: static unsigned int bloomssize = 0;
48: static struct bloom_internal **blooms = NULL;
49:
50: static unsigned char*
51: openssl_prandbuf(unsigned int size)
52: {
53: unsigned char* buf = NULL;
54: BIGNUM* rnd = BN_new();
55: if (rnd == NULL) return NULL;
56:
57: buf = (unsigned char*) malloc(size);
58: if (buf == NULL) {BN_free(rnd); return NULL;}
59:
60: if (!BN_rand(rnd, size * 8, -1, 0)) {
61: free(buf);
62: return NULL;
63: }
64:
65: BN_bn2bin(rnd, buf);
66: BN_free(rnd);
67: return buf;
68: }
69:
70: static int
71: existbloom(int f)
72: {
73: if (!initialized)
74: return 0;
75: if (f < 0)
76: return 0;
77: if ((unsigned int) f >= bloomssize)
78: return 0;
79: if (blooms[f] == NULL)
80: return 0;
81: return 1;
82: }
83:
84: static int
85: initblooms(void)
86: {
87: if (initialized || bloomssize || blooms != NULL)
88: return -1;
89: blooms = (struct bloom_internal**)
90: calloc(BLOOMSINITSIZE, sizeof(struct bloom_internal*));
91: if (blooms == NULL)
92: return -1;
93: bloomssize = BLOOMSINITSIZE;
94: initialized = 1;
95: return 0;
96: }
97:
98: static int
99: resizeblooms(void)
100: {
101: unsigned int i = 0;
102: struct bloom_internal** blooms_tmp = NULL;
103: if (!initialized || !bloomssize || blooms == NULL)
104: return -1;
105: blooms_tmp = (struct bloom_internal**)
106: realloc(blooms,
107: 2 * bloomssize * sizeof(struct bloom_internal*));
108: if (blooms_tmp == NULL)
109: return -1;
110: blooms = blooms_tmp;
111: for (i = bloomssize; i < 2 * bloomssize; i++)
112: blooms[i] = NULL;
113: bloomssize *= 2;
114: return 0;
115: }
116:
117: int
118: newbloom(unsigned long bsize, unsigned int nbhashes, unsigned int salted)
119: {
120: unsigned short i = 0;
121: if (!initialized)
122: if (initblooms())
123: return -1;
124: for (i = 0; i < bloomssize; i++)
125: if (blooms[i] == NULL)
126: break;
127: if (i == bloomssize) {
128: if (resizeblooms())
129: return -1;
130: }
131: if (salted != 0 && salted != 1)
132: return -7;
133: if (nbhashes < 1 || nbhashes > 16)
134: return -7;
135: { /* we can now add a bloom at blooms[i] */
136: unsigned int j = 0;
137: unsigned long size = 0xDeadBeef;
138: unsigned char *tmp = (unsigned char*) 0xDeadBeef;
139: unsigned char *tmpsalt = NULL;
140: struct bloom_internal* tmpb =
141: (struct bloom_internal*) 0xDeadBeef;
142: size = bsize / 8;
143: if (bsize % 8)
144: size++;
145: assert(sizeof(unsigned char) == 1);
146: tmp = (unsigned char*) malloc(size);
147: if (tmp == NULL)
148: return -3;
149: tmpb = (struct bloom_internal*)
150: malloc(sizeof(struct bloom_internal));
151: if (tmpb == NULL) {
152: free(tmp);
153: return -3;
154: }
155: if (salted != 0) {
156: tmpsalt = openssl_prandbuf(SALTSIZE);
157: if (tmpsalt == NULL) {
158: free(tmp);
159: free(tmpb);
160: return -3;
161: }
162: }
163:
164: for (j = 0; j < size; j++)
165: tmp[j] = 0;
166: tmpb->filter = tmp;
167: tmpb->size = size;
168: tmpb->nbhashes = nbhashes;
169: tmpb->salt = tmpsalt;
170: blooms[i] = tmpb;
171: }
172: return (int) i;
173: }
174:
175: int
176: destroybloom(int bd)
177: {
178: if (!existbloom(bd))
179: return -2;
180: { /* destroy blooms[bd] */
181: struct bloom_internal* tmp = blooms[bd];
182: blooms[bd] = NULL;
183: free(tmp->filter);
184: if (tmp->salt != NULL) {
185: memset(tmp->salt, 0, 16);
186: free(tmp->salt);
187: }
188: free(tmp);
189: }
190: return 0;
191: }
192:
193: static inline unsigned char
194: mask(unsigned int shift)
195: {
196: assert(shift < 8);
197: switch(shift) {
198: case 0: return 1;
199: case 1: return 2;
200: case 2: return 4;
201: case 3: return 8;
202: case 4: return 16;
203: case 5: return 32;
204: case 6: return 64;
205: case 7: return 128;
206: }
207: panic("aborting, shift: %u", shift);
208: }
209:
210: //static const unsigned char mask[] = {1, 2, 4, 8, 16, 32, 64, 128};
211:
212: static inline int
213: touchpos(int bd, unsigned long int pos)
214: {
215: unsigned long part = pos / 8;
216: unsigned int shift = pos % 8;
217: if (blooms[bd]->size <= part)
218: return -4;
219: blooms[bd]->filter[part] |= mask(shift);
220: return 0;
221: }
222:
223: static inline int
224: issetpos(int bd, unsigned long int pos)
225: {
226: if (!existbloom(bd))
227: return -1;
228: {
229: unsigned long part = pos / 8;
230: unsigned int shift = pos % 8;
231: if (blooms[bd]->size <= part)
232: return -4;
233: if (blooms[bd]->filter[part] & mask(shift))
234: return 1;
235: }
236: return 0;
237: }
238:
239: void
240: displayhash(struct blhashes *blh, FILE* out)
241: {
242: assert(blh != NULL);
243: assert(out != NULL);
244: fprintf(out, "[%d][%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:%lX:"
245: "%lX:%lX:%lX:%lX]\n", blh->bloomid,
246: blh->hashes[0], blh->hashes[1], blh->hashes[2], blh->hashes[3],
247: blh->hashes[4], blh->hashes[5], blh->hashes[6], blh->hashes[7],
248: blh->hashes[8], blh->hashes[9], blh->hashes[10],
249: blh->hashes[11], blh->hashes[12], blh->hashes[13],
250: blh->hashes[14], blh->hashes[15]);
251: }
252:
253: static int
254: hash(int bd, unsigned long int hashes[16], const void* data,
255: unsigned long datasize)
256: {
257: unsigned char hash[64]; /* Flawfinder: ignore */
258: unsigned char tmp[20+SALTSIZE]; /* Flawfinder: ignore */
259: assert(blooms[bd]->nbhashes >= 1 && blooms[bd]->nbhashes < 17);
260: if (data == NULL || datasize == 0)
261: return -1;
262: /* SHA512(data, datasize, hash); */
263: SHA1(data, datasize, tmp + SALTSIZE);
264: if (blooms[bd]->salt != NULL) {
265: /* Flawfinder: ignore */
266: memcpy(tmp, blooms[bd]->salt, SALTSIZE);
267: SHA1(tmp, SALTSIZE+20, hash);
268: } else
269: SHA1(tmp + SALTSIZE, 20, hash);
270: SHA1(hash, 20, hash + 20);
271: SHA1(hash + 20, 20, hash + 40);
272: SHA1(hash + 10, 20, hash + 44);
273:
274: #if 0
275: /* verify assumption on data sizes */
276: assert(sizeof(unsigned long int) * 16 == sizeof hash);
277: /* Flawfinder: ignore */
278: memcpy(hashes, hash, sizeof hash);
279: #else
280: /* this works even if unsigned long int are more than 32 bits long.
281: While they will then be only half initialized, this should not be
282: an issue, unless we have bloom filters bigger than 4Gio. */
283: for (int i = 0; i < 16; i++) {
284: hashes[i] = 0;
285: hashes[i] = ((uint32_t*) hash)[i];
286: }
287: #endif
288:
289: return 0;
290: }
291:
292: struct blhashes*
293: bloomhash(int bd, struct blhashes* blh, const void* data, unsigned long datasize)
294: {
295: if (!existbloom(bd))
296: return NULL;
297: if (blh == NULL)
298: blh = (struct blhashes*) calloc(1, sizeof(struct blhashes));
299: if (blh == NULL)
300: return NULL;
301: blh->bloomid = bd;
302: if (hash(bd, blh->hashes, data, datasize) == 0)
303: return blh;
304: return NULL;
305: }
306:
307: int
308: add(int bd, const void* data, unsigned long datasize)
309: {
310: unsigned int i = 0;
311: unsigned long int hashes[16];
312: if (!existbloom(bd))
313: return -1;
314: if (hash(bd, hashes, data, datasize) != 0)
315: return -5;
316: for (i = 0; i < blooms[bd]->nbhashes; i++) {
317: int ret = touchpos(bd, hashes[i] % (blooms[bd]->size * 8));
318: if (ret != 0)
319: return ret;
320: }
321: return 0;
322: }
323:
324: int
325: addhash(int bd, const struct blhashes *blh)
326: {
327: unsigned int i = 0;
328: assert(blh != NULL);
329: if (!existbloom(bd))
330: return -1;
331: if (blh->bloomid != bd)
332: return -2;
333: for (i = 0; i < blooms[bd]->nbhashes; i++) {
334: int ret = touchpos(bd, blh->hashes[i] % (blooms[bd]->size * 8));
335: if (ret != 0)
336: return ret;
337: }
338: return 0;
339: }
340:
341: int
342: isin(int bd, const void* data, unsigned long datasize)
343: {
344: unsigned int i = 0;
345: unsigned long int hashes[16];
346: if (!existbloom(bd)) {
347: stkdbg("Invalid bloom descriptor! (%d)", bd);
348: return -1;
349: }
350: if (hash(bd, hashes, data, datasize) != 0) {
351: stkdbg("bloom hash failed");
352: return -5;
353: }
354: for (i = 0; i < blooms[bd]->nbhashes; i++) {
355: int ret = issetpos(bd, hashes[i] % (blooms[bd]->size * 8));
356: if (ret != 1)
357: return ret;
358: }
359: return 1;
360: }
361:
362: int
363: isinbloom(int bd, const struct blhashes *blh)
364: {
365: unsigned int i = 0;
366: assert(blh != NULL);
367: if (!existbloom(bd)) {
368: stkdbg("Invalid bloom descriptor! (%d)", bd);
369: return -1;
370: }
371: if (blh->bloomid != bd) {
372: stkdbg("Error, comparison with data of another hash! (%d, %d)",
373: blh->bloomid, bd);
374: return -2;
375: }
376: for (i = 0; i < blooms[bd]->nbhashes; i++) {
377: int ret = issetpos(bd, blh->hashes[i] % (blooms[bd]->size * 8));
378: if (ret != 1)
379: return ret;
380: }
381: return 1;
382: }
383:
384: #ifdef WITH_DATALOC
385:
386: int
387: bloompack(struct dataloc *loc, int bd)
388: {
389: uint8_t isdata = 1;
390: if (!existbloom(bd)) {
391: isdata = 0;
392: }
393:
394: int err = writeloc(loc, &isdata, sizeof isdata);
395: if (err < 0) {
396: debug("writeloc: %d %s", err,
397: (err == -2 || err == -3 || err == -20) ?
398: strerror(errno) : "");
399: return -10;
400: }
401: if (!isdata)
402: return 0;
403:
404: debug("will write filter of size %lu", blooms[bd]->size);
405: err = writeloc(loc, &blooms[bd]->size, sizeof blooms[bd]->size);
406: if (err < 0) {
407: debug("writeloc: %d %s", err,
408: (err == -2 || err == -3 || err == -20) ?
409: strerror(errno) : "");
410: return -20;
411: }
412: err = writeloc(loc, blooms[bd]->filter, blooms[bd]->size);
413: if (err < 0) {
414: debug("writeloc: %d %s", err,
415: (err == -2 || err == -3 || err == -20) ?
416: strerror(errno) : "");
417: return -30;
418: }
419: err = writeloc(loc, &blooms[bd]->nbhashes, sizeof blooms[bd]->nbhashes);
420: if (err < 0) {
421: debug("writeloc: %d %s", err,
422: (err == -2 || err == -3 || err == -20) ?
423: strerror(errno) : "");
424: return -40;
425: }
426: err = writeloc(loc, blooms[bd]->salt, SALTSIZE);
427: if (err < 0) {
428: debug("writeloc: %d %s", err,
429: (err == -2 || err == -3 || err == -20) ?
430: strerror(errno) : "");
431: return -50;
432: }
433: return 0;
434: }
435:
436: int
437: bloomunpack(struct dataloc *loc)
438: {
439: int err = 0;
440: unsigned int i = 0;
441: if (!initialized)
442: if (initblooms())
443: return -1;
444:
445: uint8_t isdata = 123;
446: err = readloc(loc, &isdata, sizeof isdata);
447: if (err < 0)
448: return -3;
449: if (isdata == 0)
450: return 0;
451: if (isdata != 1)
452: return -5;
453:
454: for (i = 0; i < bloomssize; i++)
455: if (blooms[i] == NULL)
456: break;
457: if (i == bloomssize) {
458: if (resizeblooms())
459: return -10;
460: }
461: struct bloom_internal* tmpb =
462: (struct bloom_internal*) 0xDeadBeef;
463: tmpb = (struct bloom_internal*)
464: malloc(sizeof(struct bloom_internal));
465: if (tmpb == NULL)
466: return -20;
467:
468: err = readloc(loc, &tmpb->size, sizeof tmpb->size);
469: if (err < 0) {
470: free(tmpb);
471: return -30;
472: }
473: debug("will read filter of size %lu", tmpb->size);
474: tmpb->filter = (unsigned char*) malloc(tmpb->size);
475: if (tmpb->filter == NULL) {
476: free(tmpb);
477: return -40;
478: }
479: tmpb->salt = (unsigned char*) malloc(SALTSIZE);
480: if (tmpb->salt == NULL) {
481: free(tmpb->filter);
482: free(tmpb);
483: return -45;
484: }
485:
486: err = readloc(loc, tmpb->filter, tmpb->size);
487: if (err < 0) {
488: free(tmpb->filter);
489: free(tmpb->salt);
490: free(tmpb);
491: return -50;
492: }
493:
494: err = readloc(loc, &tmpb->nbhashes, sizeof tmpb->nbhashes);
495: if (err < 0) {
496: free(tmpb->filter);
497: free(tmpb->salt);
498: free(tmpb);
499: return -60;
500: }
501:
502: err = readloc(loc, tmpb->salt, SALTSIZE);
503: if (err < 0) {
504: free(tmpb->filter);
505: free(tmpb->salt);
506: free(tmpb);
507: return -70;
508: }
509: blooms[i] = tmpb;
510: return (int) i;
511: }
512:
513: #endif /* WITH_DATALOC */