blob: ce288d9ec78d0196d9752858fca919f6872e5690 [file] [log] [blame]
/* AUTORIGHTS
Copyright (C) 2007 Princeton University
This file is part of Ferret Toolkit.
Ferret Toolkit is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/*
** Copyright (C) 2005 Thai Computational Linguistics Laboratory (TCL)
** National Institute of Information and Communications Technology (NICT)
** Written by Canasai Kruengkrai <canasaiREMOVETHIS@gmail.com>
**
** This library is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include <limits.h>
#include <cass.h>
#include "cuckoo_hash.h"
#ifndef __FUNCTION__
#define __FUNCTION__ "__FUNCTION__"
#endif
#define MALLOC( type, n ) ( type * )malloc( ( n ) * sizeof( type ) )
#define MALLOC_ERROR( str1, str2 ) fprintf( stderr, "MALLOC ERROR: In function `%s': could not allocate memory for `%s'\n", str1, str2 ), exit( EXIT_FAILURE )
#define FATAL_ERROR( str1, str2 ) fprintf( stderr, "FATAL ERROR: In function `%s': %s\n", str1, str2 ), exit( EXIT_FAILURE )
#define TRUE 1
#define FALSE !TRUE
#define MAX_FUNCTION_SIZE 256
#define keycmp( s1, s2 ) strcmp( ( const char * )s1, ( const char * )s2 )
typedef struct { /* hash table cell type */
cass_vecset_id_t vid;
char *key;
int value;
} CKHash_Cell;
struct CKHash_Table_ {
int size; /* current size */
int shift; /* value used for hash function */
int table_size; /* size of hash tables */
int min_size, mean_size; /* rehash trigger sizes */
int max_chain; /* max. iterations in insert */
CKHash_Cell *T1; /* point to hash table 1 */
CKHash_Cell *T2; /* point to hash table 2 */
int function_size; /* size of hash function */
int *a1; /* hash function 1 */
int *a2; /* hash function 2 */
vtable_t *vtable;
};
static inline int ckh_get_size( CKHash_Table *D )
{
return( D->size );
}
static inline int ckh_get_table_size( CKHash_Table *D )
{
return( D->table_size );
}
static inline void ckh_init( int a[], int function_size )
{
int i;
for( i = 0; i < function_size; i++ )
a[i] = ( ( int )(rand() * rand()) << 1 ) + 1;
}
static inline void ckh_hash( unsigned long *h, int a[], int function_size, int table_size, int shift, char *key )
{
int i;
*h = 0;
for( i = 0; key[i]; i++ )
*h ^= ( unsigned int )( a[( i % function_size )]* ( unsigned char )key[i] );
*h = ( ( unsigned int )*h >> shift ) % table_size;
}
CKHash_Table *ckh_alloc_table( int table_size, vtable_t *_vt )
{
CKHash_Table *D;
if( ( D = MALLOC( CKHash_Table, 1 ) ) == NULL )
MALLOC_ERROR( __FUNCTION__, "D" );
D->vtable = _vt;
D->size = 0;
D->table_size = table_size;
D->mean_size = 5 * ( 2 * table_size ) / 12;
D->min_size = ( 2 * table_size ) / 5;
D->shift = 32 - ( int )( log( table_size ) / log( 2 ) + 0.5 );
D->max_chain = 4 + ( int )( 4 * log( table_size ) / log( 2 ) + 0.5 );
if( ( D->T1 = MALLOC( CKHash_Cell, D->table_size ) ) == NULL )
MALLOC_ERROR( __FUNCTION__, "D->T1" );
if( ( D->T2 = MALLOC( CKHash_Cell, D->table_size ) ) == NULL )
MALLOC_ERROR( __FUNCTION__, "D->T2" );
D->function_size = MAX_FUNCTION_SIZE;
if( ( D->a1 = MALLOC( int, D->function_size ) ) == NULL )
MALLOC_ERROR( __FUNCTION__, "D->a1" );
if( ( D->a2 = MALLOC( int, D->function_size ) ) == NULL )
MALLOC_ERROR( __FUNCTION__, "D->a2" );
ckh_init( D->a1, D->function_size );
ckh_init( D->a2, D->function_size );
int j;
for( j = 0; j < D->table_size; j++ )
{
D->T1[j].vid = D->T2[j].vid = CASS_ID_INV;
}
return( D );
}
int ckh_rehash_insert( CKHash_Table *D, cass_vecset_id_t vid)
{
unsigned long hkey;
int j;
CKHash_Cell x, temp;
char *vkey;
x.vid = vid;
for( j = 0; j < D->max_chain; j++ )
{
vkey = ARRAY_GET(*(D->vtable), x.vid);
ckh_hash( &hkey, D->a1, D->function_size, D->table_size, D->shift, vkey );
temp = D->T1[hkey];
D->T1[hkey] = x;
if( temp.vid == CASS_ID_INV )
return( TRUE );
x = temp;
vkey = ARRAY_GET(*(D->vtable), x.vid);
ckh_hash( &hkey, D->a2, D->function_size, D->table_size, D->shift, vkey );
temp = D->T2[hkey];
D->T2[hkey] = x;
if( temp.vid == CASS_ID_INV )
return( TRUE );
x = temp;
}
for( j = 0; j < D->table_size; j++ )
{
D->T1[j].vid = D->T2[j].vid = CASS_ID_INV;
}
ckh_init( D->a1, D->function_size );
ckh_init( D->a2, D->function_size );
return( FALSE );
}
void ckh_rehash( CKHash_Table *D, int new_size )
{
CKHash_Table *D_new;
int k;
//debug("rehash: %d\n", new_size);
D_new = ckh_alloc_table( new_size, D->vtable );
for( k = 0; k < D->table_size; k++ )
{
if( ( D->T1[k].vid != CASS_ID_INV ) && ( !ckh_rehash_insert( D_new, D->T1[k].vid ) ) )
{
//debug("restart at %d\n", k);
k = -1;
continue;
}
if( ( D->T2[k].vid != CASS_ID_INV ) && ( !ckh_rehash_insert( D_new, D->T2[k].vid ) ) ) {
//debug("restart at %d\n", k);
k = -1;
}
}
free( D->T1 );
free( D->T2 );
free( D->a1 );
free( D->a2 );
D_new->size = D->size;
*D = *D_new;
free( D_new );
}
int ckh_insert( CKHash_Table *D, cass_id_t vid )
{
unsigned long h1, h2;
int j;
CKHash_Cell x, temp;
char *vkey = ARRAY_GET(*(D->vtable), vid);
/*
** If the element is already in D, return
*/
ckh_hash( &h1, D->a1, D->function_size, D->table_size, D->shift, vkey );
if( D->T1[h1].vid != CASS_ID_INV )
if ( D->T1[h1].vid == vid )
return( FALSE );
ckh_hash( &h2, D->a2, D->function_size, D->table_size, D->shift, vkey );
if( D->T2[h2].vid != CASS_ID_INV )
if ( D->T2[h2].vid == vid )
return( FALSE );
/*
** If not, insert the new element in D.
*/
x.vid = vid;
for( j = 0; j < D->max_chain; j++ )
{
temp = D->T1[h1];
D->T1[h1] = x;
if( temp.vid == CASS_ID_INV )
{
D->size++;
if( D->table_size < D->size )
ckh_rehash( D, 2*D->table_size );
return( TRUE );
}
x = temp;
vkey = ARRAY_GET(*(D->vtable), x.vid);
ckh_hash( &h2, D->a2, D->function_size, D->table_size, D->shift, vkey );
temp = D->T2[h2];
D->T2[h2] = x;
if( temp.vid == CASS_ID_INV )
{
D->size++;
if( D->table_size < D->size )
ckh_rehash( D, 2*D->table_size );
return( TRUE );
}
x = temp;
vkey = ARRAY_GET(*(D->vtable), x.vid);
ckh_hash( &h1, D->a1, D->function_size, D->table_size, D->shift, vkey );
}
//debug("forced rehashing %s\n", vkey);
/*
** Forced rehash.
*/
if( D->size < D->mean_size )
ckh_rehash( D, D->table_size );
else
ckh_rehash( D, 2*D->table_size );
ckh_insert( D, x.vid );
return( TRUE );
}
int ckh_lookup( CKHash_Table *D, char *key )
{
unsigned long hkey;
char *vkey;
ckh_hash( &hkey, D->a1, D->function_size, D->table_size, D->shift, key );
if( D->T1[hkey].vid != CASS_ID_INV )
vkey = ARRAY_GET(*(D->vtable), D->T1[hkey].vid);
if( !keycmp( vkey, key ) )
return( TRUE );
ckh_hash( &hkey, D->a2, D->function_size, D->table_size, D->shift, key );
if( D->T2[hkey].vid != CASS_ID_INV )
vkey = ARRAY_GET(*(D->vtable), D->T2[hkey].vid);
if( !keycmp( vkey, key ) )
return( TRUE );
return( FALSE );
}
cass_id_t ckh_get( CKHash_Table *D, char *key )
{
unsigned long hkey;
char *vkey;
ckh_hash( &hkey, D->a1, D->function_size, D->table_size, D->shift, key );
if( D->T1[hkey].vid != CASS_ID_INV ) {
vkey = ARRAY_GET(*(D->vtable), D->T1[hkey].vid);
if( !keycmp( vkey, key ) )
return( D->T1[hkey].vid );
}
ckh_hash( &hkey, D->a2, D->function_size, D->table_size, D->shift, key );
if( D->T2[hkey].vid != CASS_ID_INV ) {
vkey = ARRAY_GET(*(D->vtable), D->T2[hkey].vid);
if( !keycmp( vkey, key ) )
return( D->T2[hkey].vid );
}
return( CASS_ID_INV );
}
CKHash_Table *ckh_destruct_table( CKHash_Table *D )
{
free( D->T1 );
free( D->T2 );
free( D->a1 );
free( D->a2 );
free( D );
return( NULL );
}