| /* |
| Author: Jianfei Zhu |
| Concordia University |
| Date: Feb. 10, 2004 |
| |
| Copyright (c) 2004, Concordia University, Montreal, Canada |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| - Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| - Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| - Neither the name of Concordia University nor the names of its |
| contributors may be used to endorse or promote products derived from |
| this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| THE POSSIBILITY OF SUCH DAMAGE. |
| |
| */ |
| |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <math.h> |
| #include <unistd.h> |
| #include <sched.h> |
| #include <sys/syscall.h> |
| #include "buffer.h" |
| #include "common.h" |
| #include "wtime.h" |
| |
| #ifdef _OPENMP |
| #include <omp.h> |
| #else |
| static int omp_get_max_threads() {return 1;} |
| static int omp_get_thread_num() {return 0;} |
| #endif //_OPENMP |
| |
| #define fast_rightsib_table_size 16 |
| int ***currentnodeiter; |
| Fnode ***nodestack; |
| int **itemstack; |
| int** origin; |
| Fnode ***hashtable; |
| int **hot_node_count; |
| int *hot_node_depth; |
| int *hot_node_index; |
| Fnode ****fast_rightsib_table; |
| Fnode ****rightsib_backpatch_stack; |
| int **global_count_array; |
| int **global_table_array; |
| int **global_temp_order_array; |
| int **global_order_array; |
| int **rightsib_backpatch_count; |
| int **sum_item_num; |
| int **global_nodenum; |
| int lowerbound_array[3] = {0x10000, 0x100, 0}; |
| int **ntypearray; |
| int *thread_finish_status; |
| int *thread_begin_status; |
| int released_pos; |
| int* first_MC_tree; |
| unsigned int * first_MR_tree; |
| char** first_MB_tree; |
| |
| |
| #define hot_item_num 16 |
| #define hot_item_num1 6 |
| #define hot_node_num (1<<hot_item_num) |
| #define _MM_HINT_T0 1 |
| #define _MM_HINT_T1 2 |
| #define _MM_HINT_T2 3 |
| #define _MM_HINT_NTA 0 |
| |
| MapFile *mapfile; |
| MapFile **thread_mapfile; |
| int sumntype[hot_node_num]; |
| int ntypehashtable[hot_node_num]; |
| int ntypeidarray[hot_node_num]; |
| int mergedworkbase[hot_node_num]; |
| int mergedworkend[hot_node_num]; |
| int mergedworknum; |
| unsigned short *threadtranscontent; |
| int **threadntypeoffsetiter; |
| int *ntypeoffsetbase; |
| int *ntypeoffsetend; |
| int numusefulntype; |
| int **threadworkload; |
| int *threadworkloadnum; |
| |
| |
| template <class T> void transform_FPTree_into_FPArray(FP_tree *fptree, int thread, T mark) |
| { |
| Fnode* temp; |
| int i; |
| memory *local_buf = fp_buf[thread]; |
| int **local_currentnodeiter = currentnodeiter[thread]; |
| Fnode **local_nodestack = nodestack[thread]; |
| int *local_itemstack = itemstack[thread]; |
| |
| T *ItemArray; |
| fptree->NodeArrayList = (int**)local_buf->newbuf(1, fptree->itemno * (sizeof(int*) * 2 + sizeof(int) * 2)); |
| fptree->MB_nodes = (char**)(fptree->NodeArrayList + fptree->itemno); |
| fptree->MC_nodes = (int*)(fptree->MB_nodes + fptree->itemno); |
| fptree->MR_nodes = (unsigned int*)fptree->MC_nodes + fptree->itemno; |
| new_data_num[thread][0] ++; |
| ItemArray = (T*)local_buf->newbuf(1, new_data_num[thread][0] * sizeof(T)); |
| for (i = 0; i < fptree->itemno; i ++) { |
| fptree->MB_nodes[i]=local_buf->bufmark(&fptree->MR_nodes[i], &fptree->MC_nodes[i]); |
| local_currentnodeiter[i] = fptree->NodeArrayList[i] = (int*)local_buf->newbuf(1, fptree->nodenum[i] * 2 * sizeof(int)); |
| } |
| int itemiter = new_data_num[thread][0] - 1; |
| |
| fptree->Root->count = 0; |
| local_nodestack[0] = fptree->Root->leftchild; |
| int stacktop = 0; |
| int kept_itemiter = new_data_num[thread][0]; |
| bool first = true; |
| while (stacktop != -1) { |
| temp = local_nodestack[stacktop]; |
| stacktop --; |
| if (temp) { |
| if (!first && temp->leftchild == NULL) { |
| stacktop ++; |
| local_nodestack[stacktop] = temp->rightsibling; |
| int itemname = temp->itemname; |
| int itemcount = temp->count; |
| int *nodeiter = local_currentnodeiter[itemname]; |
| local_currentnodeiter[itemname] += 2; |
| nodeiter[0] = kept_itemiter; |
| nodeiter[1] = itemcount; |
| kept_itemiter --; |
| } |
| else { |
| first = false; |
| ItemArray[itemiter] = mark; |
| itemiter --; |
| for (i = 0; i <= stacktop; i ++) { |
| ItemArray[itemiter] = (T)local_itemstack[i]; |
| itemiter --; |
| } |
| for (; temp != NULL; temp = temp->leftchild) { |
| stacktop ++; |
| local_nodestack[stacktop] = temp->rightsibling; |
| int itemname = temp->itemname; |
| int itemcount = temp->count; |
| local_itemstack[stacktop] = itemname; |
| ItemArray[itemiter] = (T) itemname; |
| itemiter --; |
| int *nodeiter = local_currentnodeiter[itemname]; |
| local_currentnodeiter[itemname] += 2; |
| nodeiter[0] = (itemiter + 2); |
| nodeiter[1] = itemcount; |
| } |
| kept_itemiter = itemiter + 1; |
| itemiter ++; |
| } |
| } |
| kept_itemiter ++; |
| } |
| fptree->ItemArray = (int *) ItemArray; |
| } |
| |
| template <class T> void first_transform_FPTree_into_FPArray(FP_tree *fptree, T mark) |
| { |
| int j; |
| memory *local_buf = fp_buf[0]; |
| int sum_new_data_num; |
| sum_new_data_num = 0; |
| fptree->MB_nodes = (char**) local_buf->newbuf(1, fptree->itemno * (sizeof(int*) + sizeof(int) * 2)); |
| fptree->MC_nodes = (int*)(fptree->MB_nodes + fptree->itemno); |
| fptree->MR_nodes = (unsigned int*)fptree->MC_nodes + fptree->itemno; |
| int workingthread = omp_get_max_threads(); |
| int *content_offset_array = new int [workingthread]; |
| |
| for (j = 0; j < workingthread; j ++) { |
| new_data_num[j][0] ++; |
| sum_new_data_num += new_data_num[j][0]; |
| content_offset_array[j] = sum_new_data_num; |
| } |
| int **node_offset_array = new int *[workingthread + 1]; |
| node_offset_array[0] = new int [(workingthread + 1) * fptree->itemno]; |
| for (j = 1; j <= workingthread; j ++) |
| node_offset_array[j] = node_offset_array[j - 1] + fptree->itemno; |
| for (j = 0; j < fptree->itemno; j ++) |
| node_offset_array[0][j] = 0; |
| for (j = 1; j <= workingthread; j ++) |
| for (int i = 0; i < fptree->itemno; i ++) |
| node_offset_array[j][i] = node_offset_array[j - 1][i] + global_nodenum[j - 1][i]; |
| for (j = 0; j < fptree->itemno; j ++) { |
| fptree->MB_nodes[j]=fp_node_sub_buf->bufmark(&fptree->MR_nodes[j], &fptree->MC_nodes[j]); |
| fptree->nodenum[j] = node_offset_array[workingthread][j]; |
| fptree->NodeArrayList[j] = (int*)fp_node_sub_buf->newbuf(1, node_offset_array[workingthread][j]* 2 * sizeof(int)); |
| } |
| new_data_num[0][0] = sum_new_data_num; |
| T *ItemArray = (T *)local_buf->newbuf(1, new_data_num[0][0] * sizeof(T)); |
| #pragma omp parallel for |
| for (j = 0; j < workingthread; j ++) { |
| int kept_itemiter; |
| int itemiter = content_offset_array[j] - 1; |
| int stacktop; |
| int shift_bit; |
| int i, k; |
| Fnode **local_nodestack = nodestack[j]; |
| int *local_itemstack = itemstack[j]; |
| int **local_currentnodeiter = currentnodeiter[j]; |
| Fnode* temp; |
| for (i = 0; i < fptree->itemno; i ++) |
| local_currentnodeiter[i] = fptree->NodeArrayList[i] + 2 * node_offset_array[j][i]; |
| for (k = 0; k < threadworkloadnum[j]; k ++) { |
| int ntype = threadworkload[j][k]; |
| bool first = true; |
| stacktop = 0; |
| kept_itemiter = itemiter + 1; |
| if (hashtable[0][ntype] == fptree->Root) |
| local_nodestack[0] = fptree->Root->leftchild; |
| else { |
| for (i = 0, shift_bit = 1; i < hot_item_num; i ++, shift_bit <<=1) { |
| if ((shift_bit & ntype) != 0) { |
| local_itemstack[stacktop] = i; |
| local_nodestack[stacktop] = NULL; |
| stacktop ++; |
| } |
| } |
| stacktop --; |
| local_nodestack[stacktop] = hashtable[0][ntype]; |
| } |
| while (stacktop != -1) { |
| temp = local_nodestack[stacktop]; |
| stacktop --; |
| if (temp) { |
| if (temp->leftchild ==NULL && !first) { |
| stacktop ++; |
| local_nodestack[stacktop] = temp->rightsibling; |
| int itemname = temp->itemname; |
| int itemcount = temp->count; |
| int *nodeiter = local_currentnodeiter[itemname]; |
| local_currentnodeiter[itemname] += 2; |
| nodeiter[0] = kept_itemiter; |
| nodeiter[1] = itemcount; |
| kept_itemiter --; |
| } |
| else { |
| first = false; |
| ItemArray[itemiter] = mark; |
| itemiter --; |
| for (i = 0; i <= stacktop; i ++) { |
| ItemArray[itemiter] = (T)local_itemstack[i]; |
| itemiter --; |
| } |
| for (; temp != NULL; temp = temp->leftchild) { |
| stacktop ++; |
| local_nodestack[stacktop] = temp->rightsibling; |
| int itemname = temp->itemname; |
| int itemcount = temp->count; |
| local_itemstack[stacktop] = itemname; |
| ItemArray[itemiter] = (T) itemname; |
| itemiter --; |
| int *nodeiter = local_currentnodeiter[itemname]; |
| local_currentnodeiter[itemname] += 2; |
| nodeiter[0] = (itemiter + 2); |
| nodeiter[1] = itemcount; |
| } |
| kept_itemiter = itemiter + 1; |
| itemiter ++; |
| } |
| } |
| kept_itemiter ++; |
| } |
| } |
| } |
| fptree->ItemArray = (int *) ItemArray; |
| } |
| |
| template <class T> int FPArray_conditional_pattern_base(FP_tree *fptree, int itemname, int thread, T mark) |
| { |
| int i, j; |
| int numnode = fptree->nodenum[itemname]; |
| int* nodearray = fptree->NodeArrayList[itemname]; |
| T* Trans; |
| T * ItemArray = (T *) fptree->ItemArray; |
| int local_sum_item_num = 0; |
| int *local_supp = supp[thread]; |
| int *local_global_table_array = global_table_array[thread]; |
| int *local_global_count_array = global_count_array[thread]; |
| int *local_global_temp_order_array = global_temp_order_array[thread]; |
| |
| for (i = 0; i < numnode; i ++) { |
| #ifdef ENABLE_PREFETCHING |
| _mm_prefetch(ItemArray + nodearray[8], _MM_HINT_T0); |
| #endif |
| int begin = nodearray[0]; |
| int tempcount = nodearray[1]; |
| nodearray += 2; |
| Trans = ItemArray + begin; |
| for (j = 0; Trans[j] != mark; j ++) { |
| local_supp[Trans[j]]+= tempcount; |
| } |
| local_sum_item_num += j; |
| } |
| sum_item_num[thread][0] = local_sum_item_num; |
| j = 0; |
| for(i=0; i<itemname; i++) |
| { |
| if(local_supp[i]>=THRESHOLD) |
| { |
| local_global_table_array[j] = fptree->table[i]; |
| local_global_count_array[j] = local_supp[i]; |
| j++; |
| }else if (local_supp[i] > 0) { |
| local_global_temp_order_array[fptree->table[i]] = -1; |
| } |
| local_supp[i] = 0; |
| } |
| |
| return j; |
| |
| } |
| |
| template <class T> void FPArray_scan2_DB(FP_tree*fptree, FP_tree* old_tree, int itemname, int thread, T mark) |
| { |
| int i, has; |
| int *local_origin = origin[thread]; |
| int *local_hot_node_count = hot_node_count[thread]; |
| Fnode **local_hashtable = hashtable[thread]; |
| memory *local_fp_tree_buf = fp_tree_buf[thread]; |
| int * local_compact = compact[thread]; |
| |
| int numnode = old_tree->nodenum[itemname]; |
| int* nodearray = old_tree->NodeArrayList[itemname]; |
| T* Trans; |
| T* ItemArray = (T *)old_tree->ItemArray; |
| int ntype; |
| int max_itemno; |
| int *local_global_order_array = global_order_array[thread]; |
| int num_hot_item = fptree->num_hot_item; |
| for (i = 0; i < numnode; i ++) { |
| #ifdef ENABLE_PREFETCHING |
| _mm_prefetch(ItemArray + nodearray[6], _MM_HINT_NTA); |
| #endif |
| int begin = nodearray[0]; |
| int tempcount = nodearray[1]; |
| nodearray += 2; |
| Trans = ItemArray + begin; |
| has = 0; |
| ntype = 0; |
| max_itemno = 0; |
| for (int j = 0; Trans[j] != mark; j ++) { |
| int item = local_global_order_array[Trans[j]]; |
| if (item < num_hot_item) { |
| if (item != -1) |
| ntype |= (1 << item); |
| } else { |
| local_origin[item]=1; |
| has++; |
| if (item > max_itemno) |
| max_itemno = item; |
| } |
| } |
| local_hot_node_count[ntype] += tempcount; |
| if (local_hashtable[ntype] == NULL) { |
| local_hashtable[ntype] = (Fnode*)local_fp_tree_buf->newbuf(1, sizeof(Fnode)); |
| local_hashtable[ntype]->init(hot_node_index[ntype], 0); |
| } |
| if(has) |
| { |
| fptree->fill_count(max_itemno, thread); |
| fptree->insert(local_compact, tempcount, has, ntype, thread); |
| } |
| } |
| int local_new_data_num = new_data_num[thread][0]; |
| local_new_data_num ++; |
| int step, num_hot_node, parent; |
| Fnode *current_node, *parent_node; |
| num_hot_node = 1<<num_hot_item; |
| for (i=num_hot_node-1;i>0;i--) { |
| if (local_hashtable[i] == NULL) |
| continue; |
| step = hot_node_index[i]; |
| parent = i ^ (1 << step); |
| local_hot_node_count[parent] += local_hot_node_count[i]; |
| parent_node = local_hashtable[parent]; |
| if (parent_node == NULL) { |
| parent_node = (Fnode*)local_fp_tree_buf->newbuf(1, sizeof(Fnode)); |
| parent_node->itemname = hot_node_index[parent]; |
| parent_node->rightsibling = NULL; |
| parent_node->leftchild = NULL; |
| local_hashtable[parent] = parent_node; |
| } |
| if (parent_node->leftchild == NULL) |
| local_new_data_num ++; |
| else local_new_data_num += hot_node_depth[i]; |
| current_node = local_hashtable[i]; |
| current_node->rightsibling = parent_node->leftchild; |
| parent_node->leftchild = current_node; |
| current_node->count = local_hot_node_count[i]; |
| local_hashtable[i] = NULL; |
| local_hot_node_count[i] = 0; |
| fptree->nodenum[step] ++; |
| } |
| new_data_num[thread][0] = local_new_data_num; |
| int local_rightsib_backpatch_count = rightsib_backpatch_count[thread][0]; |
| Fnode ***local_rightsib_backpatch_stack = rightsib_backpatch_stack[thread]; |
| for (i = 0; i < local_rightsib_backpatch_count; i ++) |
| *local_rightsib_backpatch_stack[i] = NULL; |
| } |
| |
| template <class T1, class T2> void transform_FPArray(T1 *oldItemArray, T2 mark, int size) |
| { |
| T2 * newItemArray = (T2 *) oldItemArray; |
| for (int i = 0; i < size; i ++) |
| newItemArray[i] = oldItemArray[i] & mark; |
| } |
| |
| template <class T> void swap(T* k, T* j) |
| { |
| T temp; |
| temp = *j; |
| *j = *k; |
| *k = temp; |
| } |
| |
| int findpivot(const int& i, const int& j) {return (i+j)/2;} |
| |
| int partition(int* array, int* temp, int l, int r, int pivot) |
| { |
| do { |
| while (array[++l] > pivot); |
| while (r && array[--r] < pivot); |
| swap(array+l, array+r); |
| swap(temp+l, temp+r); |
| }while (l < r); |
| swap(array + l, array + r); |
| swap(temp + l, temp + r); |
| return l; |
| } |
| |
| void inssort(int* array, int* temp, int i, int j) |
| { |
| for (int k=i+1; k<=j; k++) |
| for (int m=k; (m>i) && (array[m] > array[m-1]); m--) |
| { |
| swap(array+m, array+m-1); |
| swap(temp+m, temp+m-1); |
| } |
| } |
| |
| void sort(int *array, int* temp, int i, int j) |
| { |
| if(j-i < SORTHRESH) inssort(array, temp, i, j); |
| else |
| { |
| int pivotindex = findpivot(i, j); |
| swap(array+pivotindex, array+j); |
| swap(temp+pivotindex, temp+j); |
| int k = partition(array, temp, i-1, j, array[j]); |
| swap(array+k, array+j); |
| swap(temp+k, temp+j); |
| if((k-i)>1) sort(array, temp, i, k-1); |
| if((j-k)>1) sort(array, temp, k+1, j); |
| } |
| } |
| |
| stack::stack(int length) |
| { |
| top= 0; |
| FS = new int[length]; |
| counts = NULL; |
| } |
| |
| stack::~stack() |
| { |
| delete []FS; |
| } |
| |
| void stack::insert(FP_tree* fptree) |
| { |
| for(Fnode* node=fptree->Root->leftchild; node!=NULL; node=node->leftchild) |
| { |
| FS[top]=node->itemname; |
| top++; |
| } |
| } |
| |
| void FP_tree::init(int old_itemno, int new_itemno, int thread) |
| { |
| int i; |
| Root = (Fnode*)fp_buf[thread]->newbuf(1, sizeof(Fnode)); |
| Root->init(-1, 0); |
| if(old_itemno!=-1) |
| { |
| count = (int*)fp_buf[thread]->newbuf(1,new_itemno * 2 * sizeof(int)); |
| table = count + new_itemno; |
| for (i=0; i<new_itemno; i++) |
| { |
| count[i] = 0; |
| table[i] = i; |
| } |
| new_data_num[thread][0] = 0; |
| } |
| itemno = new_itemno; |
| } |
| |
| void FP_tree::database_tiling(int workingthread) |
| { |
| int i; |
| int *thread_pos = new int [workingthread]; |
| int local_num_hot_item = num_hot_item; |
| int local_itemno = itemno; |
| |
| for (i = 0; i < workingthread; i ++) { |
| thread_pos[i] = 0; |
| int j; |
| for (j = 0; j < hot_node_num; j ++) |
| ntypearray[i][j] = 0; |
| for (j = local_num_hot_item; j < local_itemno; j ++) |
| origin[i][j] = 1; |
| } |
| #pragma omp parallel for schedule(dynamic,1) |
| for (i = 0; i < mapfile->tablesize; i ++) { |
| int k, l; |
| int *content; |
| MapFileNode *currentnode; |
| MapFileNode *newnode; |
| int size; |
| unsigned short *newcontent; |
| int currentpos; |
| int thread = omp_get_thread_num(); |
| int *local_origin = origin[thread]; |
| int *local_ntype = ntypearray[thread]; |
| int ntype; |
| int item; |
| int has; |
| int *local_hot_node_count = hot_node_count[thread]; |
| newnode = thread_mapfile[thread]->first; |
| size = newnode->size; |
| newcontent = (unsigned short *) newnode->TransactionContent; |
| currentpos = thread_pos[thread]; |
| int max_item = 0; |
| int min_item = local_itemno; |
| currentnode = mapfile->table[i]; |
| ntype = 0; |
| content = currentnode->TransactionContent; |
| has = 0; |
| for (k = currentnode->top - 1; k >= 0; k --) { |
| if (content[k] == -1) { |
| ntype &= 0xffff; |
| if (has > 0) { |
| if (size - currentpos < has + 1) { |
| newnode->top = currentpos; |
| newnode = (MapFileNode *)fp_tree_buf[thread]->newbuf(1, sizeof(MapFileNode)); |
| newnode->init(5000000, 2); |
| newnode->next = thread_mapfile[thread]->first; |
| thread_mapfile[thread]->first = newnode; |
| newcontent = (unsigned short *) (newnode->TransactionContent); |
| currentpos = 0; |
| } |
| newcontent[currentpos ++] = ntype; |
| newcontent[currentpos ++] = has; |
| local_ntype[ntype] += has + 1; |
| for (l = min_item; l <= max_item; l ++) |
| if (local_origin[l] != 1) { |
| newcontent[currentpos ++] = l; |
| local_origin[l] = 1; |
| } |
| has = 0; |
| max_item = 0; |
| min_item = local_itemno; |
| } |
| local_hot_node_count[ntype] ++; |
| ntype = 0; |
| } |
| else { |
| item = item_order[content[k]]; |
| if (item < local_num_hot_item) { |
| ntype |= (1 << item); |
| } else |
| { |
| has += local_origin[item]; |
| local_origin[item] = 0; |
| if (item > max_item) |
| max_item = item; |
| if (item < min_item) |
| min_item = item; |
| } |
| } |
| } |
| ntype &= 0xffff; |
| if (has > 0) { |
| if (size - currentpos < has + 1) { |
| newnode->top = currentpos; |
| newnode = (MapFileNode *)fp_tree_buf[i]->newbuf(1, sizeof(MapFileNode)); |
| newnode->init(5000000, 2); |
| newnode->next = thread_mapfile[i]->first; |
| thread_mapfile[i]->first = newnode; |
| newcontent = (unsigned short *) (newnode->TransactionContent); |
| currentpos = 0; |
| } |
| newcontent[currentpos ++] = ntype; |
| newcontent[currentpos ++] = has; |
| local_ntype[ntype] += has + 1; |
| for (l = min_item; l <= max_item; l ++) |
| if (local_origin[l] != 1) { |
| newcontent[currentpos ++] = l; |
| local_origin[l] = 1; |
| } |
| } |
| local_hot_node_count[ntype] ++; |
| newnode->top = currentpos; |
| currentnode->finalize(); |
| thread_pos[thread] = currentpos; |
| } |
| |
| for (i = 0; i < workingthread; i ++) { |
| thread_pos[i] = 0; |
| int j; |
| for (j = local_num_hot_item; j < local_itemno; j ++) |
| origin[i][j] = 0; |
| } |
| |
| int sumworkload = 0; |
| numusefulntype = 0; |
| int averworkload = 0; |
| ntypeoffsetbase = new int [hot_node_num]; |
| ntypeoffsetend = new int [hot_node_num]; |
| int *tempntypeoffsetbase = new int [hot_node_num]; |
| int tempworkload; |
| for (i = 0; i < hot_node_num; i ++) { |
| sumntype[i] = 0; |
| for (int j = 0; j < workingthread; j ++) |
| sumntype[i] += ntypearray[j][i]; |
| if (sumntype[i] > 0) |
| numusefulntype ++; |
| tempntypeoffsetbase[i] = ntypeoffsetbase[i] = sumworkload; |
| sumworkload += sumntype[i]; |
| ntypeoffsetend[i] = sumworkload; |
| } |
| int num_hot_node = 1<<local_num_hot_item; |
| int j, step, parent; |
| Fnode **local_hashtable = hashtable[0]; |
| int *local_hot_node_count = hot_node_count[0]; |
| Fnode *current_node; |
| threadworkload = new int * [workingthread]; |
| threadworkloadnum = new int [workingthread]; |
| for (j = 0; j < workingthread; j ++) { |
| threadworkloadnum[j] = 0; |
| threadworkload[j] = new int [hot_node_num]; |
| } |
| for (i=num_hot_node-1;i>0;i--) { |
| for (j = 1; j < workingthread; j ++) { |
| local_hot_node_count[i] += hot_node_count[j][i]; |
| hot_node_count[j][i] = 0; |
| } |
| if (local_hot_node_count[i] == 0) |
| continue; |
| current_node = (Fnode*)fp_tree_buf[0]->newbuf(1, sizeof(Fnode)); |
| current_node->itemname = hot_node_index[i]; |
| current_node->rightsibling = NULL; |
| current_node->leftchild = NULL; |
| current_node->count = local_hot_node_count[i]; |
| local_hashtable[i] = current_node; |
| step = hot_node_index[i]; |
| parent = i ^ (1 << step); |
| local_hot_node_count[parent] += local_hot_node_count[i]; |
| local_hot_node_count[i] = 0; |
| if (sumntype[i] == 0) { |
| threadworkload[0][threadworkloadnum[0]] = i; |
| threadworkloadnum[0] ++; |
| global_nodenum[0][step] ++; |
| new_data_num[0][0] += hot_node_depth[i] + 1; |
| } |
| } |
| threadtranscontent = (unsigned short *) database_buf->newbuf(1, sumworkload * sizeof(short)); |
| sort(sumntype, ntypeidarray, 0, hot_node_num - 1); |
| averworkload = sumworkload / 512; |
| tempworkload = 0; |
| mergedworkbase[0] = 0; |
| mergedworknum = 0; |
| for (i = 0; i < numusefulntype; i ++) { |
| tempworkload += sumntype[i]; |
| if (tempworkload >= averworkload) { |
| mergedworkend[mergedworknum] = i + 1; |
| mergedworknum ++; |
| mergedworkbase[mergedworknum] = i + 1; |
| tempworkload = 0; |
| } |
| } |
| mergedworkend[mergedworknum] = i; |
| mergedworknum ++; |
| |
| averworkload = 0; |
| averworkload = sumworkload / workingthread; |
| j = 0; |
| |
| for (i = 0; i < hot_node_num; i ++) |
| for (j = 0; j < workingthread; j ++) { |
| if (ntypearray[j][i] > 0) { |
| threadntypeoffsetiter[j][i] = tempntypeoffsetbase[i]; |
| tempntypeoffsetbase[i] += ntypearray[j][i]; |
| } |
| } |
| |
| #pragma omp parallel for |
| for (i = 0; i < workingthread; i ++) { |
| MapFileNode *current_mapfilenode; |
| unsigned short * content; |
| int k, size, current_pos, ntype, has; |
| unsigned short *new_content; |
| int *local_threadntypeoffsetiter = threadntypeoffsetiter[i]; |
| current_mapfilenode = thread_mapfile[i]->first; |
| while (current_mapfilenode) { |
| size = current_mapfilenode->top; |
| current_pos = 0; |
| content = (unsigned short *)current_mapfilenode->TransactionContent; |
| while (current_pos < size) { |
| ntype = content[current_pos]; |
| current_pos ++; |
| has = content[current_pos]; |
| new_content = threadtranscontent + local_threadntypeoffsetiter[ntype]; |
| local_threadntypeoffsetiter[ntype] += has + 1; |
| for (k = 0; k < has + 1; k ++) |
| new_content[k] = content[current_pos ++]; |
| } |
| current_mapfilenode->finalize(); |
| current_mapfilenode = current_mapfilenode->next; |
| } |
| } |
| delete [] tempntypeoffsetbase; |
| delete [] thread_pos; |
| } |
| |
| |
| void FP_tree::scan1_DB(Data* fdat) |
| { |
| int i,j; |
| int *counts; |
| int thread = omp_get_thread_num(); |
| |
| mapfile = (MapFile*)database_buf->newbuf(1, sizeof(MapFile)); |
| mapfile->first = NULL; |
| |
| counts = fdat->parseDataFile(mapfile); |
| |
| order = (int*)fp_buf[thread]->newbuf(1, ITEM_NO * 3 * sizeof(int)); |
| table = order + ITEM_NO; |
| count = table + ITEM_NO; |
| for (i=0; i<ITEM_NO; i++) |
| { |
| order[i]=-1; |
| count[i] = counts[i]; |
| table[i] = i; |
| } |
| |
| sort(count, table, 0, ITEM_NO-1); |
| |
| for (i =0; i<ITEM_NO&&count[i] >= THRESHOLD; i++); |
| |
| itemno = i; |
| |
| for (j=0; j<itemno; j++) |
| { |
| count[j]=counts[table[j]]; |
| order[table[j]]=j; |
| } |
| |
| order_item = new int[itemno]; |
| item_order = new int[ITEM_NO]; |
| for(i=0; i<itemno; i++) |
| { |
| order_item[i]=table[i]; |
| table[i]=i; |
| item_order[i] = order[i]; |
| order[i]=i; |
| } |
| for(;i<ITEM_NO; i++) |
| { |
| item_order[i] = order[i]; |
| order[i]=-1; |
| } |
| ITEM_NO = itemno; |
| |
| delete []counts; |
| MC_tree = 0; |
| MR_tree = 0; |
| MB_tree=fp_tree_buf[thread]->bufmark(&MR_tree, &MC_tree); |
| num_hot_item = hot_item_num; |
| if (num_hot_item > itemno) |
| num_hot_item = itemno; |
| int num_hot_node = 1 << hot_item_num; |
| int workingthread=omp_get_max_threads(); |
| |
| thread_mapfile = (MapFile **)database_buf->newbuf(1, workingthread*3*sizeof(int*)); |
| ntypearray = (int **) (thread_mapfile + workingthread); |
| threadntypeoffsetiter = (int **) (ntypearray + workingthread); |
| first_MC_tree = (int *) fp_tree_buf[0]->newbuf(1, workingthread*(2*sizeof(int) + sizeof(int*))); |
| first_MB_tree = (char **) (first_MC_tree + workingthread); |
| first_MR_tree = (unsigned int *) (first_MB_tree + workingthread); |
| for (i = 0; i < workingthread; i ++) { |
| first_MB_tree[i] = fp_tree_buf[i]->bufmark(&first_MR_tree[i], &first_MC_tree[i]); |
| thread_mapfile[i] = (MapFile *)database_buf->newbuf(1, sizeof(MapFile) + sizeof(MapFileNode) + hot_node_num * 2 * sizeof(int)); |
| thread_mapfile[i]->init(); |
| thread_mapfile[i]->first = (MapFileNode *) (thread_mapfile[i] + 1); |
| thread_mapfile[i]->first->init(2000000, 2); |
| ntypearray[i] = (int *) (thread_mapfile[i]->first + 1); |
| threadntypeoffsetiter[i] = (int *) (ntypearray[i] + hot_node_num); |
| } |
| { |
| currentnodeiter = (int***)fp_buf[thread]->newbuf(1, workingthread * 25 * sizeof(int*) + itemno * 2 * sizeof(int*) + num_hot_node * 2 * sizeof(int*)); |
| itemstack = (int**) (currentnodeiter + workingthread); |
| origin = itemstack + workingthread; |
| global_count_array = origin + workingthread; |
| global_temp_order_array = global_count_array + workingthread; |
| global_order_array = global_temp_order_array + workingthread; |
| global_table_array = global_order_array + workingthread; |
| hot_node_count = global_table_array + workingthread; |
| supp = hot_node_count + workingthread; |
| ITlen = supp + workingthread; |
| bran = ITlen + workingthread; |
| compact = bran + workingthread; |
| prefix = compact + workingthread; |
| rightsib_backpatch_count = prefix + workingthread; |
| sum_item_num = rightsib_backpatch_count + workingthread; |
| new_data_num = sum_item_num + workingthread; |
| list = (stack **) (new_data_num + workingthread); |
| hashtable = (Fnode ***) (list + workingthread); |
| nodestack = hashtable + workingthread; |
| fast_rightsib_table = (Fnode ****) (nodestack + workingthread); |
| rightsib_backpatch_stack = fast_rightsib_table + workingthread; |
| nodenum = (int *) (rightsib_backpatch_stack + workingthread); |
| NodeArrayList = (int **) (nodenum + itemno); |
| thread_finish_status = (int *) (NodeArrayList + itemno); |
| thread_begin_status = thread_finish_status + workingthread; |
| hot_node_depth = thread_begin_status + workingthread; |
| hot_node_index = hot_node_depth + num_hot_node; |
| global_nodenum = (int **) (hot_node_index + num_hot_node); |
| for (i = 0; i < workingthread; i ++) { |
| list[i] = new stack(itemno); |
| thread_finish_status[i] = itemno; |
| thread_begin_status[i] = itemno - 1; |
| } |
| released_pos = itemno; |
| for (i = 0; i < itemno; i ++) |
| nodenum[i] = 0; |
| } |
| for (i = 1; i < num_hot_node; i ++) { |
| hot_node_depth[i] = 0; |
| for (j = 1<<(num_hot_item - 1); j != 0; j >>= 1) |
| if ((j & i) != 0) { |
| hot_node_depth[i] ++; |
| } |
| for (j = num_hot_item - 1; ((1 << j) & i) == 0; j --); |
| hot_node_index[i] = j; |
| } |
| hot_node_depth[0] = 0; |
| #pragma omp parallel for |
| for (int k = 0; k < workingthread; k ++) { |
| int i; |
| #ifdef __linux__ |
| #ifdef CPU_SETSIZE |
| cpu_set_t cpu_mask; |
| CPU_ZERO(&cpu_mask); |
| CPU_SET(k,&cpu_mask); |
| sched_setaffinity(k,sizeof(cpu_set_t), &cpu_mask); |
| #else |
| unsigned long cpu_mask = (unsigned long) 1 << MyRank; |
| printf("NOT CPU_SETSIZE cpu_mask:%d\n", cpu_mask); |
| sched_setaffinity(k, sizeof(unsigned long), &cpu_mask); |
| #endif |
| #endif |
| currentnodeiter[k] = (int**)fp_buf[k]->newbuf(1, itemno * (14 + fast_rightsib_table_size) * sizeof(int *) + num_hot_node * 2 * sizeof(int *) + (fast_rightsib_table_size * itemno) * sizeof(int *) + fast_rightsib_table_size + 3 * sizeof(int*)); |
| nodestack[k] = (Fnode**)(currentnodeiter[k] + itemno); |
| itemstack[k] = (int*)(nodestack[k] + itemno); |
| global_count_array[k] = itemstack[k] + itemno; |
| global_table_array[k] = global_count_array[k] + itemno; |
| global_temp_order_array[k] = global_table_array[k] + itemno; |
| global_order_array[k] = global_temp_order_array[k] + itemno; |
| supp[k] = global_order_array[k] + itemno; |
| ITlen[k] = supp[k] + itemno; |
| bran[k] = ITlen[k] + itemno; |
| compact[k] = bran[k] + itemno; |
| prefix[k] = compact[k] + itemno; |
| hashtable[k] = (Fnode**) (prefix[k] + itemno); |
| origin[k] = (int *) (hashtable[k] + num_hot_node); |
| hot_node_count[k] = (int *) (origin[k] + itemno); |
| fast_rightsib_table[k] = (Fnode ***) (hot_node_count[k] + num_hot_node); |
| fast_rightsib_table[k][0] = (Fnode **) (fast_rightsib_table[k] + fast_rightsib_table_size); |
| for (i = 1; i < fast_rightsib_table_size; i ++) |
| fast_rightsib_table[k][i] = fast_rightsib_table[k][i - 1] + itemno; |
| global_nodenum[k] = (int *)fp_tree_buf[k]->newbuf(1, itemno*sizeof(int)); |
| for (i = 0; i < itemno; i ++) |
| global_nodenum[k][i] = 0; |
| new_data_num[k] = (int *) (fast_rightsib_table[k][fast_rightsib_table_size - 1] + itemno); |
| new_data_num[k][0] = 0; |
| rightsib_backpatch_count[k] = new_data_num[k] + 1; |
| sum_item_num[k] = rightsib_backpatch_count[k] + 1; |
| rightsib_backpatch_stack[k] = (Fnode ***) (sum_item_num[k] + 1); |
| rightsib_backpatch_count[k][0] = 0; |
| for (i = 0; i < itemno * fast_rightsib_table_size; i ++) |
| fast_rightsib_table[k][0][i] = NULL; |
| for (i = 1; i < num_hot_node; i ++) { |
| hot_node_count[k][i] = 0; |
| hashtable[k][i] = NULL; |
| } |
| hashtable[k][0] = Root; |
| for (i = 0; i < itemno; i ++) { |
| origin[k][i] = 0; |
| supp[k][i] = 0; |
| ITlen[k][i] = 0; |
| bran[k][i] = 0; |
| } |
| } |
| mapfile->transform_list_table(); |
| for (i = 0; i < hot_node_num; i ++) |
| ntypeidarray[i] = i; |
| } |
| |
| void FP_tree::insert(int* compact, int counts, int current, int ntype, int thread) |
| { |
| Fnode* child; |
| Fnode* temp, *temp2; |
| Fnode** backpatch_node = NULL; |
| int i=0, k; |
| child = hashtable[thread][ntype]; |
| int *local_bran = bran[thread]; |
| if (ntype < fast_rightsib_table_size) { |
| temp = fast_rightsib_table[thread][ntype][compact[i]]; |
| if (temp == NULL) { |
| backpatch_node = &(fast_rightsib_table[thread][ntype][compact[i]]); |
| goto OUT; |
| } |
| temp->count+=counts; |
| child=temp; |
| i++; |
| } |
| |
| while(i<current) |
| { |
| int itemname = compact[i]; |
| temp=child->leftchild; |
| if (temp == NULL) |
| break; |
| if(temp->itemname!=itemname) { |
| temp = temp->rightsibling; |
| while (1) { |
| if (temp == NULL) |
| goto OUT; |
| if(temp->itemname==itemname)break; |
| temp = temp->rightsibling; |
| } |
| } |
| temp->count+=counts; |
| child=temp; |
| i++; |
| } |
| OUT: |
| k = current - i; |
| if (k > 0) { |
| temp = (Fnode*)fp_tree_buf[thread]->newbuf(1, sizeof(Fnode) * k); |
| if (backpatch_node) { |
| *backpatch_node = temp; |
| rightsib_backpatch_stack[thread][rightsib_backpatch_count[thread][0]] = backpatch_node; |
| rightsib_backpatch_count[thread][0] ++; |
| } |
| nodenum[compact[i]] ++; |
| local_bran[i]++; |
| temp->itemname = compact[i]; |
| temp->count = counts; |
| if (child->leftchild == NULL) { |
| temp->rightsibling = child->leftchild; |
| child->leftchild=temp; |
| new_data_num[thread][0] += k; |
| } else { |
| temp->rightsibling = child->leftchild->rightsibling; |
| child->leftchild->rightsibling = temp; |
| new_data_num[thread][0] += current + hot_node_depth[ntype]; |
| } |
| temp2 = temp; |
| temp ++; |
| i++; |
| while(i<current) |
| { |
| nodenum[compact[i]] ++; |
| local_bran[i]++; |
| temp->itemname = compact[i]; |
| temp->rightsibling = NULL; |
| temp->count = counts; |
| temp2->leftchild=temp; |
| temp2 = temp; |
| temp ++; |
| i++; |
| } |
| temp --; |
| temp->leftchild = NULL; |
| } |
| } |
| |
| int FP_tree::cal_level_25(int thread) |
| { |
| int i, total_25=0, total_50=0, total_bran=0, maxlen=0; |
| int *local_bran = bran[thread]; |
| |
| for(i=0; i<this->itemno && local_bran[i]!=0; i++); |
| maxlen =i; |
| for(i=0; i<int(maxlen*0.25); i++) |
| total_25 +=local_bran[i]; |
| total_50 = total_25; |
| for(i=int(maxlen*0.25); i<this->itemno*0.5; i++) |
| total_50 +=local_bran[i]; |
| for(i=0; i<this->itemno && local_bran[i]!=0; i++) |
| { |
| total_bran+=local_bran[i]; |
| local_bran[i]=0; |
| } |
| return total_bran; |
| } |
| |
| void FP_tree::fill_count(int max_itemno, int thread) |
| { |
| int i, j=0; |
| int *local_origin = origin[thread]; |
| int *local_compact = compact[thread]; |
| for(i=num_hot_item; i<= max_itemno; i++) |
| { |
| if (local_origin[i] != 0) { |
| local_compact[j++]=i; |
| local_origin[i] = 0; |
| } |
| } |
| } |
| |
| |
| void FP_tree::scan2_DB(int workingthread) |
| { |
| double tstart, tend; |
| int j; |
| wtime(&tstart); |
| database_tiling(workingthread); |
| Fnode **local_hashtable = hashtable[0]; |
| #pragma omp parallel for schedule(dynamic,1) |
| for (j = 0; j < mergedworknum; j ++) { |
| int thread = omp_get_thread_num(); |
| int localthreadworkloadnum = threadworkloadnum[thread]; |
| int *localthreadworkload = threadworkload[thread]; |
| int has, ntype; |
| unsigned short *content = threadtranscontent; |
| int *local_nodenum = global_nodenum[thread]; |
| memory *local_fp_tree_buf = fp_tree_buf[thread]; |
| int *local_bran = bran[thread]; |
| unsigned short* compact; |
| Fnode ***local_rightsib_backpatch_stack = rightsib_backpatch_stack[thread]; |
| int local_rightsib_backpatch_count = rightsib_backpatch_count[thread][0]; |
| for (int t = mergedworkbase[j]; t < mergedworkend[j]; t ++) { |
| ntype = ntypeidarray[t]; |
| localthreadworkload[localthreadworkloadnum] = ntype; |
| localthreadworkloadnum ++; |
| int size = ntypeoffsetend[ntype]; |
| int current_pos = ntypeoffsetbase[ntype]; |
| Fnode **local_fast_rightsib_table = fast_rightsib_table[thread][ntype]; |
| Fnode *current_root = local_hashtable[ntype]; |
| int current_new_data_num = 0; |
| int current_hot_node_depth = hot_node_depth[ntype]; |
| if (ntype != 0) |
| local_nodenum[hot_node_index[ntype]] ++; |
| while (current_pos < size) { |
| has = content[current_pos]; |
| current_pos += 1; |
| compact = content + current_pos; |
| { |
| Fnode* child; |
| Fnode* temp, *temp2; |
| Fnode** backpatch_node = NULL; |
| int i=0, k; |
| child = current_root; |
| if (ntype < fast_rightsib_table_size) { |
| temp = local_fast_rightsib_table[compact[i]]; |
| if (temp == NULL) { |
| backpatch_node = &(local_fast_rightsib_table[compact[i]]); |
| } |
| else { |
| temp->count+=1; |
| child=temp; |
| i++; |
| } |
| } |
| if (temp != NULL) |
| while(i<has) |
| { |
| for(temp=child->leftchild; temp!=NULL; temp = temp->rightsibling) |
| { |
| if(temp->itemname==table[compact[i]])break; |
| } |
| if(!temp)break; |
| temp->count+=1; |
| child=temp; |
| i++; |
| } |
| k = has - i; |
| if (k > 0) { |
| temp = (Fnode*)local_fp_tree_buf->newbuf(1, sizeof(Fnode) * k); |
| if (backpatch_node) { |
| *backpatch_node = temp; |
| local_rightsib_backpatch_stack[local_rightsib_backpatch_count] = backpatch_node; |
| local_rightsib_backpatch_count ++; |
| } |
| local_nodenum[compact[i]] ++; |
| local_bran[i]++; |
| temp->itemname = compact[i]; |
| temp->count = 1; |
| if (child->leftchild == NULL) { |
| current_new_data_num += k; |
| temp->rightsibling = child->leftchild; |
| child->leftchild=temp; |
| } else { |
| temp->rightsibling = child->leftchild->rightsibling; |
| child->leftchild->rightsibling = temp; |
| current_new_data_num += has + current_hot_node_depth; |
| } |
| temp2 = temp; |
| temp ++; |
| i++; |
| while(i<has) |
| { |
| local_nodenum[compact[i]] ++; |
| local_bran[i]++; |
| temp->itemname = compact[i]; |
| temp->rightsibling = NULL; |
| temp->count = 1; |
| temp2->leftchild=temp; |
| temp2 = temp; |
| temp ++; |
| i++; |
| } |
| temp --; |
| temp->leftchild = NULL; |
| } |
| } |
| current_pos += has; |
| } |
| new_data_num[thread][0] += current_new_data_num + current_hot_node_depth + 1; |
| } |
| rightsib_backpatch_count[thread][0] = local_rightsib_backpatch_count; |
| threadworkloadnum[thread] = localthreadworkloadnum; |
| } |
| delete database_buf; |
| |
| for (int i = 0; i < workingthread; i ++) { |
| int temp = 0; |
| for (j = 0; j < itemno; j ++) |
| temp += global_nodenum[i][j]; |
| } |
| int totalnodes = cal_level_25(0); |
| |
| #pragma omp parallel for |
| for (j = 0; j < workingthread; j ++) { |
| int local_rightsib_backpatch_count = rightsib_backpatch_count[j][0]; |
| Fnode ***local_rightsib_backpatch_stack = rightsib_backpatch_stack[j]; |
| for (int i = 0; i < local_rightsib_backpatch_count; i ++) |
| *local_rightsib_backpatch_stack[i] = NULL; |
| } |
| wtime(&tend); |
| // printf("Creating the first tree from source file cost %f seconds\n", tend - tstart); |
| // printf("we have %d nodes in the initial FP tree\n", totalnodes); |
| } |
| |
| void FP_tree::scan1_DB(int thread, FP_tree* old_tree, int item) |
| { |
| int i; |
| int *local_global_count_array = global_count_array[thread]; |
| int *local_global_table_array = global_table_array[thread]; |
| int *local_global_temp_order_array = global_temp_order_array[thread]; |
| int *local_global_order_array = global_order_array[thread]; |
| int *old_table = old_tree->table; |
| for(i=0; i< itemno; i++) |
| { |
| count[i]=local_global_count_array[i]; |
| table[i]=local_global_table_array[i]; |
| } |
| |
| sort(count, table, 0, itemno-1); |
| |
| for(i=0; i<itemno; i++) |
| { |
| local_global_temp_order_array[table[i]]=i; |
| } |
| for (i = 0; i < item; i ++) |
| local_global_order_array[i] = local_global_temp_order_array[old_table[i]]; |
| |
| nodenum = (int*)fp_buf[thread]->newbuf(1, itemno * sizeof(int)); |
| for (i = 0; i < itemno; i ++) |
| nodenum[i] = 0; |
| |
| if (itemno > hot_item_num1 * 2) |
| num_hot_item = hot_item_num1; |
| else |
| num_hot_item = itemno / 2; |
| if ((1<<num_hot_item) > (sum_item_num[thread][0] / 8)) |
| num_hot_item = 0; |
| hashtable[thread][0] = Root; |
| rightsib_backpatch_count[thread][0] = 0; |
| } |
| |
| void FP_tree::powerset(int*prefix, int prefixlen, int* items, int current, int itlen, FSout* fout, int thread )const |
| { |
| if(current==itlen) |
| { |
| if(prefixlen!=0) |
| { |
| fout->printset(list[thread]->top, list[thread]->FS); |
| fout->printSet(prefixlen, prefix, count[global_temp_order_array[thread][prefix[prefixlen-1]]]); |
| } |
| }else{ |
| current++; |
| powerset(prefix, prefixlen, items, current, itlen, fout, thread); |
| current--; |
| prefix[prefixlen++]=items[current++]; |
| powerset(prefix, prefixlen, items, current, itlen, fout, thread); |
| } |
| } |
| |
| void FP_tree::generate_all(int new_item_no, int thread, FSout* fout)const |
| { |
| powerset(prefix[thread], 0, list[thread]->FS, list[thread]->top, list[thread]->top+new_item_no, fout, thread); |
| } |
| |
| bool FP_tree::Single_path(int thread)const |
| { |
| Fnode* node; |
| for(node=Root->leftchild; node!=NULL; node=node->leftchild) |
| if(node->rightsibling!=NULL)return false; |
| |
| return true; |
| } |
| |
| void FP_tree::release_node_array_after_mining(int sequence, int thread, int workingthread) |
| { |
| int current, i; |
| thread_finish_status[thread] = sequence; |
| current = 0; |
| for (i = 0; i < workingthread; i ++) { |
| if (current < thread_finish_status[i]) |
| current = thread_finish_status[i]; |
| } |
| { |
| #pragma omp critical |
| { |
| if (current < released_pos) { |
| released_pos = current; |
| fp_node_sub_buf->freebuf(MR_nodes[current], MC_nodes[current], MB_nodes[current]); |
| } |
| } |
| } |
| |
| } |
| |
| void FP_tree::release_node_array_before_mining(int sequence, int thread, int workingthread) |
| { |
| int current, i; |
| thread_begin_status[thread] = sequence; |
| current = 0; |
| for (i = 0; i < workingthread; i ++) { |
| if (current < thread_begin_status[i]) |
| current = thread_begin_status[i]; |
| } |
| current ++; |
| { |
| #pragma omp critical |
| { |
| if (current < released_pos) { |
| released_pos = current; |
| fp_node_sub_buf->freebuf(MR_nodes[current], MC_nodes[current], MB_nodes[current]); |
| } |
| } |
| } |
| |
| } |
| |
| int FP_tree::FP_growth_first(FSout* fout) |
| { |
| int sequence; |
| double tstart, tend, temp_time; |
| int function_type; |
| int workingthread = omp_get_max_threads(); |
| |
| wtime(&tstart); |
| fp_node_sub_buf = new memory(80, 131072, 2097152 * 4, 2); |
| if (itemno <= 0x100) { |
| function_type = 0; |
| first_transform_FPTree_into_FPArray(this, (unsigned char) 0xff); |
| } |
| else if (itemno < 0x10000) { |
| function_type = 1; |
| first_transform_FPTree_into_FPArray(this, (unsigned short) 0xffff); |
| } |
| else { |
| function_type = 2; |
| first_transform_FPTree_into_FPArray(this, (unsigned int) 0xffffffff); |
| } |
| for (int k = 0; k < hot_node_num; k ++) |
| hashtable[0][k] = NULL; |
| int first_data_num = new_data_num[0][0]; |
| wtime(&temp_time); |
| for (int i = 0; i < workingthread; i ++) |
| fp_tree_buf[i]->freebuf(first_MR_tree[i], first_MC_tree[i], first_MB_tree[i]); |
| fp_tree_buf[0]->freebuf(MR_tree, MC_tree, MB_tree); |
| int lowerbound = 0x7fffffff; |
| int upperbound; |
| if (lowerbound > itemno) |
| lowerbound = itemno; |
| for (int t = 0; t < 3; t ++) { |
| upperbound = lowerbound; |
| if (upperbound > itemno) |
| upperbound = itemno; |
| lowerbound = lowerbound_array[t]; |
| |
| if (upperbound > lowerbound) { |
| int new_function_type = 2; |
| if (upperbound - 1 < 0x10000) |
| new_function_type = 1; |
| if (upperbound - 1 < 0x100) |
| new_function_type = 0; |
| if (new_function_type != function_type) { |
| if (t == 1) |
| transform_FPArray(ItemArray, (unsigned short) 0xffff, first_data_num); |
| if (t == 2) |
| transform_FPArray((unsigned short *) ItemArray, (unsigned char) 0xff, first_data_num); |
| function_type = new_function_type; |
| } |
| } |
| |
| #pragma omp parallel for schedule(dynamic,1) |
| for(sequence=upperbound - 1; sequence>=lowerbound; sequence--) |
| { int current, new_item_no, listlen; |
| int MC2=0; |
| unsigned int MR2=0; |
| char* MB2; |
| int thread = omp_get_thread_num(); |
| //release_node_array_before_mining(sequence, thread, workingthread); remove due to data race |
| memory *local_fp_tree_buf = fp_tree_buf[thread]; |
| memory *local_fp_buf = fp_buf[thread]; |
| stack *local_list = list[thread]; |
| int *local_ITlen = ITlen[thread]; |
| int *local_global_table_array = global_table_array[thread]; |
| int *local_global_count_array = global_count_array[thread]; |
| current=table[sequence]; |
| local_list->FS[local_list->top++]=current; |
| listlen = local_list->top; |
| local_ITlen[local_list->top-1]++; |
| if(fout) |
| fout->printSet(local_list->top, local_list->FS, count[sequence]); |
| if(sequence !=0) { |
| if (function_type == 0) |
| new_item_no=FPArray_conditional_pattern_base(this, sequence, thread, (unsigned char) 0xff); |
| else if (function_type == 1) |
| new_item_no=FPArray_conditional_pattern_base(this, sequence, thread, (unsigned short) 0xffff); |
| else new_item_no=FPArray_conditional_pattern_base(this, sequence, thread, (unsigned int) 0xffffffff); |
| } |
| else |
| new_item_no = 0; |
| |
| if(new_item_no==0 || new_item_no == 1) |
| { |
| if(new_item_no==1) |
| { |
| local_list->FS[local_list->top++] = local_global_table_array[0]; |
| local_ITlen[local_list->top-1]++; |
| if(fout) |
| fout->printSet(local_list->top, local_list->FS, local_global_count_array[0]); |
| } |
| local_list->top=listlen-1; |
| release_node_array_after_mining(sequence, thread, workingthread); |
| continue; |
| } |
| |
| FP_tree *fptree; |
| fptree = (FP_tree*)local_fp_buf->newbuf(1, sizeof(FP_tree)); |
| fptree->init(this->itemno, new_item_no, thread); |
| |
| MB2=local_fp_tree_buf->bufmark(&MR2, &MC2); |
| fptree->MB_tree = MB2; |
| fptree->MR_tree = MR2; |
| fptree->MC_tree = MC2; |
| |
| fptree->scan1_DB(thread, this, sequence); |
| if (function_type == 0) |
| FPArray_scan2_DB(fptree, this, sequence, thread, (unsigned char)0xff); |
| else if (function_type == 1) |
| FPArray_scan2_DB(fptree, this, sequence, thread, (unsigned short)0xffff); |
| else FPArray_scan2_DB(fptree, this, sequence, thread, (unsigned int)0xffffffff); |
| local_list->top=listlen; |
| if(fptree->Single_path(thread)) |
| { |
| Fnode* node; |
| for(node=fptree->Root->leftchild; node!=NULL; node=node->leftchild) |
| local_list->FS[local_list->top++] = fptree->table[node->itemname]; |
| local_list->top = listlen; |
| int i1, i2; |
| int temp = 1; |
| for (i1 = 1, i2 = new_item_no; i1 <= new_item_no; i1 ++, i2 --) { |
| temp = (temp * i2) / i1; |
| local_ITlen[local_list->top+i1-1] += temp; |
| } |
| if (fout) |
| fptree->generate_all(new_item_no, thread, fout); |
| local_list->top--; |
| local_fp_tree_buf->freebuf(fptree->MR_tree, fptree->MC_tree, fptree->MB_tree); |
| }else{ |
| fptree->FP_growth(thread, fout); |
| local_list->top = listlen-1; |
| } |
| release_node_array_after_mining(sequence, thread, workingthread); |
| } |
| } |
| wtime(&tend); |
| // printf("the major FP_growth cost %f vs %f seconds\n", tend - tstart, temp_time - tstart); |
| return 0; |
| } |
| |
| int FP_tree::FP_growth(int thread, FSout* fout) |
| { |
| int sequence, current, new_item_no, listlen; |
| int MC2=0; |
| unsigned int MR2=0; |
| char* MB2; |
| int function_type; |
| memory *local_fp_tree_buf = fp_tree_buf[thread]; |
| memory *local_fp_buf = fp_buf[thread]; |
| stack *local_list = list[thread]; |
| int *local_ITlen = ITlen[thread]; |
| int *local_global_table_array = global_table_array[thread]; |
| int *local_global_count_array = global_count_array[thread]; |
| if (itemno <= 0x100) { |
| function_type = 0; |
| transform_FPTree_into_FPArray(this, thread, (unsigned char) 0xff); |
| } |
| else if (itemno <= 0x10000) { |
| function_type = 1; |
| transform_FPTree_into_FPArray(this, thread, (unsigned short) 0xffff); |
| } |
| else { |
| function_type = 2; |
| transform_FPTree_into_FPArray(this, thread, (unsigned int) 0xffffffff); |
| } |
| local_fp_tree_buf->freebuf(MR_tree, MC_tree, MB_tree); |
| for(sequence=itemno - 1; sequence>=0; sequence--) |
| { |
| current=table[sequence]; |
| local_list->FS[local_list->top++]=current; |
| listlen = local_list->top; |
| |
| local_ITlen[local_list->top-1]++; |
| if(fout) |
| fout->printSet(local_list->top, local_list->FS, count[sequence]); |
| if(sequence !=0) { |
| if (function_type == 0) |
| new_item_no=FPArray_conditional_pattern_base(this, sequence, thread, (unsigned char) 0xff); |
| else if (function_type == 1) |
| new_item_no=FPArray_conditional_pattern_base(this, sequence, thread, (unsigned short) 0xffff); |
| else new_item_no=FPArray_conditional_pattern_base(this, sequence, thread, (unsigned int) 0xffffffff); |
| } |
| else |
| new_item_no = 0; |
| |
| if(new_item_no==0 || new_item_no == 1) |
| { |
| if(new_item_no==1) |
| { |
| local_list->FS[local_list->top++] = local_global_table_array[0]; |
| local_ITlen[local_list->top-1]++; |
| if(fout) |
| fout->printSet(local_list->top, local_list->FS, local_global_count_array[0]); |
| } |
| local_list->top=listlen-1; |
| continue; |
| } |
| |
| FP_tree *fptree; |
| fptree = (FP_tree*)local_fp_buf->newbuf(1, sizeof(FP_tree)); |
| fptree->init(this->itemno, new_item_no, thread); |
| |
| MB2=local_fp_tree_buf->bufmark(&MR2, &MC2); |
| fptree->MB_tree = MB2; |
| fptree->MR_tree = MR2; |
| fptree->MC_tree = MC2; |
| |
| fptree->scan1_DB(thread, this, sequence); |
| if (function_type == 0) |
| FPArray_scan2_DB(fptree, this, sequence, thread, (unsigned char)0xff); |
| else if (function_type == 1) |
| FPArray_scan2_DB(fptree, this, sequence, thread, (unsigned short)0xffff); |
| else FPArray_scan2_DB(fptree, this, sequence, thread, (unsigned int)0xffffffff); |
| local_list->top=listlen; |
| if(fptree->Single_path(thread)) |
| { |
| Fnode* node; |
| for(node=fptree->Root->leftchild; node!=NULL; node=node->leftchild) |
| local_list->FS[local_list->top++] = fptree->table[node->itemname]; |
| local_list->top = listlen; |
| int i1, i2; |
| int temp = 1; |
| for (i1 = 1, i2 = new_item_no; i1 <= new_item_no; i1 ++, i2 --) { |
| temp = (temp * i2) / i1; |
| local_ITlen[local_list->top+i1-1] += temp; |
| } |
| if (fout) |
| fptree->generate_all(new_item_no, thread, fout); |
| local_list->top--; |
| local_fp_tree_buf->freebuf(fptree->MR_tree, fptree->MC_tree, fptree->MB_tree); |
| }else{ |
| fptree->FP_growth(thread, fout); |
| local_list->top = listlen-1; |
| } |
| local_fp_buf->freebuf(MR_nodes[sequence], MC_nodes[sequence], MB_nodes[sequence]); |
| } |
| return 0; |
| } |
| |