blob: 569d062258b229239e676afc5232d1a38eb4f68f [file] [log] [blame]
/*************************************************************************/
/* */
/* Copyright (c) 1994 Stanford University */
/* */
/* All rights reserved. */
/* */
/* Permission is given to use, copy, and modify this software for any */
/* non-commercial purpose as long as this copyright notice is not */
/* removed. All other uses, including redistribution in whole or in */
/* part, are forbidden without prior written permission. */
/* */
/* This software is provided with absolutely no warranty and no */
/* support. */
/* */
/*************************************************************************/
#include <stdio.h>
#define FitsInCache 2048
#define MAX_PROC 1024
#define PAGE_SIZE 4096
typedef struct {
long n, m, *col, *startrow, *row;
double *nz;
} SMatrix;
struct Pair {
long block_num;
struct Pair *next;
};
typedef struct {
long i, j, owner, remaining, nmod;
long length; /* number of full rows in block */
long parent; /* block number of parent block */
double checksum;
volatile unsigned long done;
long *structure, *relative;
double *nz;
struct Pair *pair;
} Block;
typedef union {
Block *block;
double nz;
} Entry;
typedef struct {
long n, *col, *row, n_blocks, n_entries, entries_allocated;
long *partition_size, *renumbering, *mapI, *mapJ;
long *domain, *domains, n_domains, *proc_domains;
long n_partitions, max_partition;
double **proc_domain_storage;
Entry *entry;
} BMatrix;
#define BLOCK(bl_no) (LB.entry[bl_no].block)
#define BLOCKROW(bl_no) (BLOCK(bl_no)->i)
#define BLOCKCOL(bl_no) (BLOCK(bl_no)->j)
#define OWNER(bl_no) (BLOCK(bl_no)->owner)
struct Update {
long i, j, src, remaining;
long dimi, dimj, *structi, *structj;
double *update;
struct Update *next;
};
struct Task {
long block_num, desti, destj, src;
struct Update *update;
struct Task *next;
};
struct GlobalMemory {
BARDEC(start)
LOCKDEC(waitLock)
LOCKDEC(memLock)
unsigned long runtime[MAX_PROC];
};
struct BlockList {
long theBlock, row, col, length;
long *structure;
double *nz;
struct BlockList *next;
};
struct LocalCopies {
double *blktmp;
long max_panel;
long *link;
long *first;
double *storage;
double *updatetmp;
long *relative;
struct Update *freeUpdate;
struct Task *freeTask;
unsigned long rs;
unsigned long rf;
unsigned long us;
unsigned long uf;
unsigned long ss;
unsigned long sf;
unsigned long runtime;
unsigned long runs;
};
#define DISTRIBUTED 888
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
#define LB_DOMAINS 2
#define EMBED 1
#define NO_PERM 1
#define FAN_OUT 2
/*
* amal.C
*/
long OpsFromSuper(long size, long nz);
long CountSupers(long cols, long *node);
void Amalgamate2(long join, SMatrix M, long *T, long *nz, long *node, long *domain, long target_size);
void ConsiderMerge(long join, long super, SMatrix M, long *nz, long *node, long *domain, long target_size, long traversal_order);
void JoinTwoSupers2(long *nz, long *node, long child, long parent);
void ReorderMatrix(SMatrix M, long super, long *node, long *counter, long *PERM);
void FixNodeNZAndT(SMatrix M, long *PERM, long *node, long *nz, long *T);
void InvertPerm(long n, long *PERM, long *INVP);
double PathLength(long cols, long rows, long target_panel_size);
/*
* assign.C
*/
void PDIV(long src_col, long src_nz, double *ops, double *misses, double *runtime);
void PMOD(long src_col, long dest_col, long dest_nz, double *ops, double *misses, double *runtime);
void PADD(long cols, long rows, double *misses, double *runtime);
void AssignBlocksNow(void);
void EmbedBlocks(void);
/*
* bfac.C
*/
void BFac(long diag, struct LocalCopies *lc);
void OneFac(double *A, long n1, long n2);
void BDiv(long n1, long n3, double *diag_nz, double *below_nz, struct LocalCopies *lc);
void OneDiv(double *A, double *B, long n1, long n3, long n4);
void BMod(long n1, long n2, long n3, double *top_nz, double *bend_nz, double *dest_nz, struct LocalCopies *lc);
void CopyBlock(double *B, double *dest, long n3, long is, long ks, long il, long kl);
void CopyBlockBack(double *B, double *src, long n3, long is, long ks, long il, long kl);
void OneMatmat(double *B, double *A, double *C, long n1, long n2, long n3, long n4, long n5);
void BLMod(long n1, long n2, double *left_nz, double *dest_nz, struct LocalCopies *lc);
void OneLower(double *A, double *C, long n1, long n2, long n3);
void FindBlockUpdate(long domain, long blj, long bli, double **update, long *stride);
/*
* bksolve.C
*/
double *TriBSolve(BMatrix LB, double *b, long *PERM);
double ComputeNorm(double *x, long n);
double *CreateVector(SMatrix M);
/*
* block2.C
*/
void CreateBlockedMatrix2(SMatrix M, long block_ub, long *T, long *firstchild, long *child, long *PERM, long *INVP, long *domain, long *partition);
long FindNumPartitions(long set_size, long piece_size);
void ComputeBlockParents(long *T);
void FillInStructure(SMatrix M, long *firstchild, long *child, long *PERM, long *INVP);
void FillInNZ(SMatrix M, long *PERM, long *INVP);
void FindDomStructure(long super, long *nz, long n_nz);
void FindDummyDomainStructure(long which_domain);
void CheckColLength(long col, long n_nz);
void FindBlStructure(SMatrix M, long super, long *PERM, long *INVP, long *firstchild, long *child, long *structure, long *nz);
void FindSuperStructure(SMatrix M, long super, long *PERM, long *INVP, long *firstchild, long *child, long *structure, long *nz, long *n_nz);
void FindDetailedStructure(long col, long *structure, long *nz, long n_nz);
void AllocateNZ(void);
void FillIn(SMatrix M, long col, long *PERM, long *INVP, double *scatter);
void InsSort(long *nz, long n);
long BlDepth(long col);
void SortByKey(long n, long *blocks, long *keys);
void DumpSizes(BMatrix LB, long *domain, long *sizes);
void ComputePartitionNumbering(long *numbering);
void FindMachineDimensions(long P);
long EmbeddedOwner(long block);
/*
* fo.C
*/
void PreProcessFO(long MyNum);
void PreAllocate1FO(void);
void PreAllocateFO(long MyNum, struct LocalCopies *lc);
void BNumericSolveFO(long MyNum, struct LocalCopies *lc);
void DriveParallelFO(long MyNum, struct LocalCopies *lc);
long HandleTaskFO(long MyNum, struct LocalCopies *lc);
void DiagReceived(long diag, long MyNum, struct LocalCopies *lc);
void BlockReceived(long block, long MyNum, struct LocalCopies *lc);
struct BlockList *CopyOneBlock(long block, long MyNum);
void FreeColumnListFO(long p, long col);
void DecrementRemaining(long dest_block, long MyNum, struct LocalCopies *lc);
void PerformUpdate(struct BlockList *above_bl, struct BlockList *below_bl, long MyNum, struct LocalCopies *lc);
void DistributeUpdateFO(long which_domain, long MyNum, struct LocalCopies *lc);
void HandleUpdate2FO(long which_domain, long bli, long blj, long MyNum, struct LocalCopies *lc);
void FindRelativeIndices(long *src_structure, long src_len, long *dest_structure, long *relative);
void BlockReadyFO(long block, long MyNum, struct LocalCopies *lc);
void BlockDoneFO(long block, long MyNum, struct LocalCopies *lc);
void CheckRemaining(void);
void CheckReceived(void);
void ComputeRemainingFO(void);
void InitRemainingFO(long MyNum);
void ComputeReceivedFO(void);
void InitReceivedFO(long MyNum);
void ScatterUpdateFO(long dimi, long *structi, long dimj, long *structj, long destdim, double *oldupdate, double *newupdate);
void ScatterUpdateFO2(long dimi, long *structi, long dimj, long *structj, long stride, long destdim, double *oldupdate, double *newupdate);
/*
* malloc.C
*/
void MallocInit(long P);
void InitOneFreeList(long p);
void MallocStats(void);
long FindBucket(long size);
char *MyMalloc(long size, long home);
void MigrateMem(long *start, long length, long home);
void MyFree(long *block);
void MyFreeNow(long *block);
/*
* mf.C
*/
void InitTaskQueues(long P);
long FindBlock(long i, long j);
void Send(long src_block, long dest_block, long desti, long destj, struct Update *update, long p, long MyNum, struct LocalCopies *lc);
long TaskWaiting(long MyNum);
void GetBlock(long *desti, long *destj, long *src, struct Update **update, long MyNum, struct LocalCopies *lc);
/*
* numLL.C
*/
void FactorLLDomain(long which_domain, long MyNum, struct LocalCopies *lc);
void CompleteSupernodeB(long super);
void CompleteColumnB(long j);
void FindRelativeIndicesLeft(long *src_structure, long rows_in_update, long offset, long *indices, long *relative);
void ScatterSuperUpdate(double *update, long cols_in_update, long rows_in_update, double *dest_nz, long dest_len, long *relative);
void ModifySuperByColumn(double *src_nz, long cols_in_update, long rows_in_update, double *dest_nz, long dest_len, long *relative);
void SetDestIndices(long super, long *indices);
void SetDomainIndices(long super, long *indices);
void ModifySuperBySuper(long src, long theFirst, long theLast, long length, double *dest);
void ModifyTwoBySupernodeB(long super, long lastcol, long theFirst, double *destination0, double *destination1);
void ModifyBySupernodeB(long super, long lastcol, long theFirst, double *destination);
/*
* parts.C
*/
void Partition(SMatrix M, long parts, long *T, long *assigned_ops, long *domain, long *domains, long *proc_domains);
void MarkSubtreeAsDomain(long *domain, long root);
void NumberPartition(long parts, long *assigned_ops, long distribute);
long MaxBucket(long *assigned_ops, long parts);
long MinBucket(long *assigned_ops, long parts);
struct Chunk *NewChunk(void);
struct Chunk *GetChunk(void);
/*
* seg.C
*/
void ComputeTargetBlockSize(SMatrix M, long P);
void FindMaxHeight(SMatrix L, long root, long height, long *maxm);
void NoSegments(SMatrix M);
void CreatePermutation(long n, long *PERM, long permutation_method);
/*
* solve.C
*/
void Go(void);
void PlaceDomains(long P);
void ComposePerm(long *PERM1, long *PERM2, long n);
/*
* tree.C
*/
void EliminationTreeFromA(SMatrix A, long *T, long *P, long *INVP);
void ParentToChild(long *T, long n, long *firstchild, long *child);
void ComputeNZ(SMatrix A, long *T, long *nz, long *PERM, long *INVP);
void FindSupernodes(SMatrix A, long *T, long *nz, long *node);
void ComputeWorkTree(SMatrix A, long *nz, double *work_tree);
/*
* util.C
*/
SMatrix NewMatrix(long n, long m, long nz);
void FreeMatrix(SMatrix M);
double *NewVector(long n);
double Value(long i, long j);
SMatrix ReadSparse(char *name, char *probName);
void DumpLine(FILE *fp);
void ParseIntFormat(char *buf, long *num, long *size);
void ReadVector(FILE *fp, long n, long *where, long perline, long persize);
SMatrix LowerToFull(SMatrix L);
void ISort(SMatrix M, long k);