Browse Source

implement and document a hash algorithm for bencoded dictionariers

git.mgm/mediaproxy-ng/2.2
Richard Fuchs 13 years ago
parent
commit
b0b9007fbc
2 changed files with 119 additions and 25 deletions
  1. +110
    -24
      daemon/bencode.c
  2. +9
    -1
      daemon/bencode.h

+ 110
- 24
daemon/bencode.c View File

@ -10,6 +10,8 @@
/* set to 0 for alloc debugging, e.g. through valgrind */ /* set to 0 for alloc debugging, e.g. through valgrind */
#define BENCODE_MIN_BUFFER_PIECE_LEN 512 #define BENCODE_MIN_BUFFER_PIECE_LEN 512
#define BENCODE_HASH_BUCKETS 31 /* prime numbers work best */
struct __bencode_buffer_piece { struct __bencode_buffer_piece {
char *tail; char *tail;
unsigned int left; unsigned int left;
@ -20,6 +22,9 @@ struct __bencode_free_list {
void *ptr; void *ptr;
struct __bencode_free_list *next; struct __bencode_free_list *next;
}; };
struct __bencode_hash {
struct bencode_item *buckets[BENCODE_HASH_BUCKETS];
};
@ -55,6 +60,7 @@ static void __bencode_container_init(bencode_item_t *cont) {
static void bencode_dictionary_init(bencode_item_t *dict) { static void bencode_dictionary_init(bencode_item_t *dict) {
dict->type = BENCODE_DICTIONARY; dict->type = BENCODE_DICTIONARY;
dict->iov[0].iov_base = "d"; dict->iov[0].iov_base = "d";
dict->value = 0;
__bencode_container_init(dict); __bencode_container_init(dict);
} }
@ -369,38 +375,90 @@ char *bencode_collapse_dup(bencode_item_t *root, int *len) {
return ret; return ret;
} }
static unsigned int bencode_hash_str_len(const unsigned char *s, int len) {
unsigned long *ul;
unsigned int *ui;
unsigned short *us;
if (len >= sizeof(*ul)) {
ul = (void *) s;
return *ul % BENCODE_HASH_BUCKETS;
}
if (len >= sizeof(*ui)) {
ui = (void *) s;
return *ui % BENCODE_HASH_BUCKETS;
}
if (len >= sizeof(*us)) {
us = (void *) s;
return *us % BENCODE_HASH_BUCKETS;
}
if (len >= sizeof(*s))
return *s % BENCODE_HASH_BUCKETS;
return 0;
}
static unsigned int bencode_hash_str(bencode_item_t *str) {
assert(str->type == BENCODE_STRING);
return bencode_hash_str_len(str->iov[1].iov_base, str->iov[1].iov_len);
}
static void bencode_hash_insert(bencode_item_t *key, bencode_item_t *value, struct __bencode_hash *hash) {
unsigned int bucket, i;
i = bucket = bencode_hash_str(key);
while (1) {
if (!hash->buckets[i]) {
hash->buckets[i] = value;
break;
}
i++;
if (i >= BENCODE_HASH_BUCKETS)
i = 0;
if (i == bucket)
break;
}
}
static bencode_item_t *bencode_decode_dictionary(bencode_buffer_t *buf, const char *s, const char *end) { static bencode_item_t *bencode_decode_dictionary(bencode_buffer_t *buf, const char *s, const char *end) {
bencode_item_t *ret, *item;
bencode_item_t *ret, *key, *value;
struct __bencode_hash *hash;
if (*s != 'd') if (*s != 'd')
return NULL; return NULL;
s++; s++;
ret = __bencode_item_alloc(buf, 0);
ret = __bencode_item_alloc(buf, sizeof(*hash));
if (!ret) if (!ret)
return NULL; return NULL;
bencode_dictionary_init(ret); bencode_dictionary_init(ret);
ret->value = 1;
hash = (void *) ret->__buf;
memset(hash, 0, sizeof(*hash));
while (s < end) { while (s < end) {
item = __bencode_decode(buf, s, end);
if (!item)
key = __bencode_decode(buf, s, end);
if (!key)
return NULL; return NULL;
s += item->str_len;
if (item->type == BENCODE_END_MARKER)
s += key->str_len;
if (key->type == BENCODE_END_MARKER)
break; break;
if (item->type != BENCODE_STRING)
if (key->type != BENCODE_STRING)
return NULL; return NULL;
__bencode_container_add(ret, item);
__bencode_container_add(ret, key);
if (s >= end) if (s >= end)
return NULL; return NULL;
item = __bencode_decode(buf, s, end);
if (!item)
value = __bencode_decode(buf, s, end);
if (!value)
return NULL; return NULL;
s += item->str_len;
if (item->type == BENCODE_END_MARKER)
s += value->str_len;
if (value->type == BENCODE_END_MARKER)
return NULL; return NULL;
__bencode_container_add(ret, item);
__bencode_container_add(ret, value);
bencode_hash_insert(key, value, hash);
} }
return ret; return ret;
@ -553,26 +611,54 @@ bencode_item_t *bencode_decode(bencode_buffer_t *buf, const char *s, int len) {
} }
/* XXX inefficient, use a proper hash instead */
static bencode_item_t *bencode_dictionary_key_test(bencode_item_t *val, const char *keystr, int keylen) {
bencode_item_t *key;
key = val->sibling;
assert(key != NULL);
assert(key->type == BENCODE_STRING);
if (keylen != key->iov[1].iov_len)
return key;
if (memcmp(keystr, key->iov[1].iov_base, keylen))
return key;
return NULL;
}
bencode_item_t *bencode_dictionary_get_len(bencode_item_t *dict, const char *keystr, int keylen) { bencode_item_t *bencode_dictionary_get_len(bencode_item_t *dict, const char *keystr, int keylen) {
bencode_item_t *key, *val; bencode_item_t *key, *val;
unsigned int bucket, i;
struct __bencode_hash *hash;
if (!dict) if (!dict)
return NULL; return NULL;
if (dict->type != BENCODE_DICTIONARY) if (dict->type != BENCODE_DICTIONARY)
return NULL; return NULL;
for (val = dict->child; val; val = key->sibling) {
key = val->sibling;
assert(key != NULL);
assert(key->type == BENCODE_STRING);
if (keylen != key->iov[1].iov_len)
continue;
if (memcmp(keystr, key->iov[1].iov_base, keylen))
continue;
/* try hash lookup first if possible */
if (dict->value == 1) {
hash = (void *) dict->__buf;
i = bucket = bencode_hash_str_len((const unsigned char *) keystr, keylen);
while (1) {
val = hash->buckets[i];
if (!val)
return NULL; /* would be there, but isn't */
key = bencode_dictionary_key_test(val, keystr, keylen);
if (!key)
return val;
i++;
if (i >= BENCODE_HASH_BUCKETS)
i = 0;
if (i == bucket)
break; /* fall back to regular lookup */
}
}
return val;
for (val = dict->child; val; val = key->sibling) {
key = bencode_dictionary_key_test(val, keystr, keylen);
if (!key)
return val;
} }
return NULL; return NULL;


+ 9
- 1
daemon/bencode.h View File

@ -45,7 +45,7 @@ struct bencode_item {
struct iovec iov[2]; /* when decoding, iov[1] contains the contents of a string object */ struct iovec iov[2]; /* when decoding, iov[1] contains the contents of a string object */
unsigned int iov_cnt; unsigned int iov_cnt;
unsigned int str_len; /* length of the whole ENCODED object. NOT the length of a byte string */ unsigned int str_len; /* length of the whole ENCODED object. NOT the length of a byte string */
long long int value; /* when decoding an integer, contains the value */
long long int value; /* when decoding an integer, contains the value; otherwise used internally */
bencode_item_t *parent, *child, *sibling; bencode_item_t *parent, *child, *sibling;
bencode_buffer_t *buffer; bencode_buffer_t *buffer;
char __buf[0]; char __buf[0];
@ -245,6 +245,14 @@ char *bencode_collapse_dup(bencode_item_t *root, int *len);
* pair (guaranteed to be a string and guaranteed to be present). Following another ->sibling will * pair (guaranteed to be a string and guaranteed to be present). Following another ->sibling will
* point to the VALUE of the last-but-one key/value pair, and so on. * point to the VALUE of the last-but-one key/value pair, and so on.
* *
* However, to access children objects of dictionaries, the special functions following the naming
* scheme bencode_dictionary_get_* below should be used. They perform key lookup through a simple
* hash built into the dictionary object and so perform the lookup much faster. Only dictionaries
* created through a decoding process (i.e. not ones created from bencode_dictionary()) have this
* property. The hash is efficient only up to a certain number of elements (BENCODE_HASH_BUCKETS
* in bencode.c) contained in the dictionary. If the number of children object exceeds this number,
* key lookup will be slower than simply linearily traversing the list.
*
* The decoding function for dictionary object does not check whether keys are unique within the * The decoding function for dictionary object does not check whether keys are unique within the
* dictionary. It also does not care about lexicographical order of the keys. * dictionary. It also does not care about lexicographical order of the keys.
* *


Loading…
Cancel
Save