blob: 8d498eac43164df50420f7dfbf82d95d77f67d6b [file] [log] [blame]
/* AUTORIGHTS
Copyright (C) 2007 Princeton University
This file is part of Ferret Toolkit.
Ferret Toolkit is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#define _ISOC99_SOURCE
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdarg.h>
#include <cass.h>
void __debug (const char *file, int line, const char *func, const char *fmt, ...)
{
va_list argptr;
int cnt;
fprintf(stderr, "%s:%d: %s:", file, line, func);
va_start(argptr, fmt);
cnt = vfprintf(stderr, fmt, argptr);
va_end(argptr);
}
int mkpath (char *buf, const char *base, const char *table, const char *ext)
{
if (snprintf(buf, BUFSIZ, "%s/%s.%s", base, table, ext) >= BUFSIZ) assert(0);
return 0;
}
int fexist (const char *path)
{
struct stat buf;
if (stat(path, &buf) != 0) return 0;
return S_ISREG(buf.st_mode);
}
int dexist (const char *path)
{
struct stat buf;
if (lstat(path, &buf) != 0) return 0;
return S_ISDIR(buf.st_mode);
}
char *cass_read_pchar (CASS_FILE *in)
{
cass_size_t len;
char *p;
if (cass_read_size(&len, 1, in) != 1) return NULL;
p = malloc(len);
if (cass_read_char(p, len, in) != len)
{
free(p);
return NULL;
}
// warn("READ: %s\n", p);
return p;
}
int cass_write_pchar (const char *buf, CASS_FILE *out)
{
cass_size_t len = strlen(buf) + 1;
if (cass_write_size(&len, 1, out) != 1) return CASS_ERR_IO;
if (cass_write_char(buf, len, out) != len) return CASS_ERR_IO;
return 0;
}
int32_t param_get_int (const char *param, const char *key, int32_t def)
{
if (param == NULL) return def;
const char *p = strstr(param, key);
if (p == NULL) return def;
p += strlen(key);
return strtol(p, NULL, 0);
}
float param_get_float (const char *param, const char *key, float def)
{
if (param == NULL) return def;
const char *p = strstr(param, key);
float f;
if (p == NULL) return def;
p += strlen(key);
sscanf(p, "%f", &f);
return f;
}
int param_get_float_array (const char *param, const char *key, cass_size_t *_n, float *f /* default */)
{
cass_size_t n = 0;
char *p = strstr(param, key);
if (p == NULL) return -1;
p += strlen(key);
while (*p != '(' && *p != 0 ) p++;
if (*p == 0) return -1;
for (;;)
{
p++;
sscanf(p, "%f", &f[n]);
n++;
while (*p != ',' && *p != ')' && *p != 0) p++;
if (*p == ')' || *p == 0) break;
}
*_n = n;
return 0;
}
cass_size_t cass_vec_dim2size (cass_vec_type_t type, cass_size_t dim)
{
cass_size_t size = CASS_VEC_HEAD_SIZE;
switch (type)
{
case CASS_VEC_INT: size += sizeof(int32_t) * dim; break;
case CASS_VEC_FLOAT: size += sizeof(float) * dim; break;
case CASS_VEC_BIT: size += (dim + CHUNK_BIT -1)/CHUNK_BIT * sizeof(chunk_t); break;
default: assert(0);
}
return size;
}
int cass_result_alloc_list (cass_result_t *result, cass_size_t num_regions, cass_size_t topk)
{
memset(result, 0, sizeof *result);
if (num_regions == 0)
{
result->flags |= CASS_RESULT_LIST;
ARRAY_INIT_SIZE(result->u.list, topk);
}
else
{
int i;
result->flags |= CASS_RESULT_LISTS;
ARRAY_INIT_SIZE(result->u.lists, num_regions);
result->u.lists.len = num_regions;
for (i = 0; i < num_regions; i++)
{
ARRAY_INIT_SIZE(result->u.lists.data[i], topk);
}
}
return 0;
}
int
cass_result_alloc_bitmap(cass_result_t *result, cass_size_t num_regions,
cass_size_t size)
{
memset(result, 0, sizeof(cass_result_t));
if (num_regions == 0) {
result->flags |= CASS_RESULT_BITMAP;
bitmap_init(&(result->u.bitmap), size);
}
else {
int i;
result->flags |= CASS_RESULT_BITMAPS;
ARRAY_INIT_SIZE(result->u.bitmaps, num_regions);
result->u.bitmaps.len = num_regions;
for (i=0; i<num_regions; i++)
bitmap_init(&(result->u.bitmaps.data[i]), size);
}
return 0;
}
int
cass_result_merge_bitmaps(cass_result_t *src, cass_result_t *dst)
{
bitmap_t *dst_map = &(dst->u.bitmap);
cass_size_t dst_size = bitmap_get_size(dst_map);
assert(src->flags & CASS_RESULT_BITMAPS);
assert(dst->flags & CASS_RESULT_BITMAP);
bitmap_clear(dst_map);
ARRAY_BEGIN_FOREACH_P(src->u.bitmaps, bitmap_t *map)
{
assert(bitmap_get_size(map) == dst_size);
bitmap_union(dst_map, map);
} ARRAY_END_FOREACH_P;
return 0;
}
int cass_result_free (cass_result_t *result)
{
if (result->flags & CASS_RESULT_LIST)
{
ARRAY_CLEANUP(result->u.list);
}
else if (result->flags & CASS_RESULT_LISTS)
{
ARRAY_BEGIN_FOREACH_P(result->u.lists, cass_list_t *p)
{
ARRAY_CLEANUP(*p);
} ARRAY_END_FOREACH_P;
ARRAY_CLEANUP(result->u.lists);
}
else if (result->flags & CASS_RESULT_BITMAP)
{
bitmap_free_vec(&(result->u.bitmap));
}
else if (result->flags & CASS_RESULT_BITMAPS)
{
ARRAY_BEGIN_FOREACH_P(result->u.bitmaps, cass_map_t *map)
{
bitmap_free_vec(map);
} ARRAY_END_FOREACH_P;
ARRAY_CLEANUP(result->u.bitmaps);
}
else debug("cass_result_free: unsupported formate.\n");
return 0;
}
static inline int __cass_list_entry_ge (cass_list_entry_t a, cass_list_entry_t b)
{
return a.dist >= b.dist;
}
static inline int __cass_list_entry_rev_ge (cass_list_entry_t a, cass_list_entry_t b)
{
return b.dist >= a.dist;
}
QUICKSORT_GENERATE(__cass_list_entry, cass_list_entry_t);
cass_result_t *
cass_result_merge_lists (cass_result_t *src, cass_dataset_t *ds, int32_t topk)
{
cass_result_t *merged_result;
int merged_topk;
assert((src->flags & CASS_RESULT_BITMAPS) == 0);
assert((src->flags & CASS_RESULT_BITMAP) == 0);
assert((src->flags & CASS_RESULT_LIST) == 0);
assert(src->flags & CASS_RESULT_LISTS);
assert(topk == 0); // xxx later will modify this.
merged_topk = 0;
ARRAY_BEGIN_FOREACH(src->u.lists, cass_list_t p)
{
merged_topk += ARRAY_LEN(p);
}ARRAY_END_FOREACH;
merged_result = type_calloc(cass_result_t, 1);
cass_result_alloc_list(merged_result, 0, merged_topk);
// ARRAY_INIT_SIZE(merged_result->u.list, merged_topk);
merged_result->u.list.len = merged_topk;
TOPK_INIT(merged_result->u.list.data, id, merged_topk, CASS_ID_MAX);
ARRAY_BEGIN_FOREACH(src->u.lists, cass_list_t p)
{
ARRAY_BEGIN_FOREACH(p, cass_list_entry_t p2){
cass_vec_t *vec;
cass_list_entry_t entry;
// assert(p2.id >= 0 && p2.id <= ds->max_vec);
if (p2.id < 0 || p2.id > ds->max_vec)
{
// printf("%d\t%d\n", __array_foreach_index, p2.id);
continue;
}
vec = (void *)ds->vec + ds->vec_size * p2.id;
entry.id = vec->parent;
entry.dist = 0;
//ARRAY_APPEND(merged_result->u.list, entry);
TOPK_INSERT_MIN_UNIQ(merged_result->u.list.data, id, id, merged_topk, entry);
//TOPK_INSERT_MIN(merged_result->u.list.data, id, merged_topk, entry);
}ARRAY_END_FOREACH;
}ARRAY_END_FOREACH;
merged_result->flags |= CASS_RESULT_MALLOC;
return merged_result;
}