blob: f4655b5791649b004709c7b3f3f9760d3e67b3df [file] [log] [blame]
/* AUTORIGHTS
Copyright (C) 2007 Princeton University
This file is part of Ferret Toolkit.
Ferret Toolkit is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <math.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <cass.h>
#include <cass_heap.h>
#include "LSH.h"
#define ptb_vec_ge(a,b) ((a)->key >= (b)->key)
ptb_vec_t *gen_score (int M)
/* use ->key1 */
/* use all 64 bits */
{
int i;
ptb_vec_t *score = type_calloc(ptb_vec_t, 2 * M);
assert(sizeof(score->set) >= 8);
for (i = 0; i < M; i++)
{
score[i].set = 1 << i;
score[i].key = (i + 1.0) * (i + 2.0) / (M + 1.0) / (M + 2.0) * 0.25;
score[i].key1 = (i + 1.0) / (M + 1.0) * 0.5;
score[2 * M - 1 - i].set = 1 << (2 * M - 1 - i);
score[2 * M - 1 - i].key = 1.0 - 2.0 * score[i].key1 + score[i].key;
score[2 * M - 1 - i].key1 = 1.0 - score[i].key1;
}
return score;
}
int gen_perturb_set (const ptb_vec_t *score, ptb_vec_t *set, int M, int T)
/* use -> max */
/* use 64 bit */
{
ptb_vec_t ptb;
ptb_vec_t ptb2;
ARRAY_TYPE(ptb_vec_t) heap;
int i, j, m;
ARRAY_INIT(heap);
ptb = score[0];
ptb.max = 0;
HEAP_ENQUEUE(heap, ptb, ptb_vec_ge);
i = 0;
if (T > 0)
for (;;)
{
if (heap.len <= 0)
{
/*
int j, k;
printf("T = %d, i = %d\n", T, i);
for (j = 0; j < i; j++)
{
for (k = 0; k < M; k++)
{
if (set[j].set & (1 << k))printf("-");
else if (set[j].set & (1 << (2 * M - 1 - k))) printf("+");
else printf("0");
}
printf("\n");
}
*/
assert(0);
}
ptb = HEAP_HEAD(heap);
for (j = 0; j < M; j++) if ((ptb.set & j) && (ptb.set & (2*M-1-j))) break;
if (j >= M)
{
set[i] = ptb;
i++;
if (i == T) break;
}
HEAP_DEQUEUE(heap, ptb_vec_ge);
m = ptb.max;
ptb.max++;
if (ptb.max >= M * 2) continue;
ptb2 = ptb;
ptb2.set &= ~score[m].set;
ptb2.key -= score[m].key;
// if ((ptb.set & score[2 * M - 1 - ptb.max].set) == 0)
/* add expand */
{
ptb.set |= score[ptb.max].set;
ptb.key += score[ptb.max].key;
HEAP_ENQUEUE(heap, ptb, ptb_vec_ge);
}
// if ((ptb2.set & score[2 * M - 1 - ptb2.max].set) == 0)
/* add shift */
{
ptb2.set |= score[ptb2.max].set;
ptb2.key += score[ptb2.max].key;
HEAP_ENQUEUE(heap, ptb2, ptb_vec_ge);
}
}
if (i < T) assert(0);
ARRAY_CLEANUP(heap);
return 0;
}
int map_perturb_vector (const ptb_vec_t set[], ptb_vec_t vector[], const ptb_vec_t mapping[], int M, int T)
/* use ->dir */
{
ptb_vec_t vec;
int i, j;
for (i = 0; i < T; i++)
{
vec.dir = vec.set = 0;
vec.key = 0;
for (j = 0; j < 2 * M; j++)
{
if (set[i].set & (1 << j))
{
assert((vec.set & mapping[j].set) == 0);
vec.set |= mapping[j].set;
vec.dir |= mapping[j].dir;
vec.key += mapping[j].key;
}
}
vector[i] = vec;
}
return 0;
}
static inline int ptb_ge (ptb_vec_t a, ptb_vec_t b)
{
return a.key >= b.key;
}
QUICKSORT_GENERATE(ptb,ptb_vec_t)