blob: 6fff8752bebecb82e2457718447445278b01eaad [file] [log] [blame]
/*************************************************************************/
/* */
/* Copyright (c) 1994 Stanford University */
/* */
/* All rights reserved. */
/* */
/* Permission is given to use, copy, and modify this software for any */
/* non-commercial purpose as long as this copyright notice is not */
/* removed. All other uses, including redistribution in whole or in */
/* part, are forbidden without prior written permission. */
/* */
/* This software is provided with absolutely no warranty and no */
/* support. */
/* */
/*************************************************************************/
EXTERN_ENV
#include "matrix.h"
#include <math.h>
#define AddMember(set, new) { int s, n; s = set; n = new; \
lc->link[n] = lc->link[s]; lc->link[s] = n; }
extern BMatrix LB;
extern struct GlobalMemory *Global;
extern int *node; /* global */
FactorLLDomain(which_domain, MyNum, lc)
int MyNum;
struct LocalCopies *lc;
{
int i, start, root;
int j, j_last, j_len, dest_super;
int k, k_length, update_size;
int theFirst, theLast;
int *relative, *indices;
double *domain_update;
extern int *firstchild, *child;
relative = (int *) MyMalloc(LB.n*sizeof(int), MyNum);
indices = (int *) MyMalloc(LB.n*sizeof(int), MyNum);
root = LB.domains[which_domain];
start = root;
while (firstchild[start] != firstchild[start+1])
start = child[firstchild[start]];
lc->link[root+1] = -1;
/* compute domain columns */
for (j=start; j<=root; j+=node[j]) {
j_last = j+node[j];
j_len = LB.col[j+1]-LB.col[j];
SetDestIndices(j, indices);
while (lc->link[j] != -1) {
k = lc->link[j];
lc->link[j] = lc->link[k];
k_length = LB.col[k+1]-LB.col[k];
theFirst = lc->first[k];
theLast = theFirst+1;
while (theLast < k_length && LB.row[LB.col[k]+theLast] < j_last)
theLast++;
if (theLast-theFirst == node[j] &&
k_length-theFirst == j_len) {
ModifySuperBySuper(k, theFirst, theLast, k_length,
(double *) &LB.entry[LB.col[j]].nz);
}
else if (node[k] > 1) {
ModifySuperBySuper(k, theFirst, theLast, k_length, lc->storage);
FindRelativeIndicesLeft(&LB.row[LB.col[k]+theFirst],
k_length-theFirst, 0, indices, relative);
ScatterSuperUpdate(lc->storage, theLast-theFirst, k_length-theFirst,
(double *) &LB.entry[LB.col[j]].nz,
j_len, relative);
}
else {
FindRelativeIndicesLeft(&LB.row[LB.col[k]+theFirst],
k_length-theFirst, 0, indices, relative);
ModifySuperByColumn((double *) &LB.entry[LB.col[k]+theFirst].nz,
theLast-theFirst, k_length-theFirst,
(double *) &LB.entry[LB.col[j]].nz,
j_len, relative);
}
lc->first[k] = theLast; /* move on */
if (theLast < k_length) {
dest_super = LB.row[LB.col[k]+theLast];
if (dest_super > root)
dest_super = root+1;
else if (node[dest_super] < 0)
dest_super += node[dest_super];
AddMember(dest_super, k);
}
}
CompleteSupernodeB(j);
lc->first[j] = node[j];
if (lc->first[j] < j_len) {
dest_super = LB.row[LB.col[j]+lc->first[j]];
if (dest_super > root)
dest_super = root+1;
else if (node[dest_super] < 0)
dest_super += node[dest_super];
AddMember(dest_super, j);
}
}
/* compute update matrix */
j_len = LB.col[root+1]-LB.col[root]-1;
update_size = j_len*(j_len+1)/2;
if (update_size == 0)
domain_update = NULL;
else if (LB.proc_domain_storage[which_domain])
domain_update = LB.proc_domain_storage[which_domain];
else {
domain_update = (double *) MyMalloc(update_size*sizeof(double),
MyNum);
LB.proc_domain_storage[which_domain] = domain_update;
}
for (i=0; i<update_size; i++)
domain_update[i] = 0.0;
SetDomainIndices(root, indices);
while (lc->link[root+1] != -1) {
k = lc->link[root+1];
lc->link[root+1] = lc->link[k];
k_length = LB.col[k+1] - LB.col[k];
theFirst = lc->first[k];
theLast = k_length;
if (theLast-theFirst == j_len) {
ModifySuperBySuper(k, theFirst, theLast, k_length, domain_update);
}
else if (node[k] > 1) {
ModifySuperBySuper(k, theFirst, theLast, k_length, lc->storage);
FindRelativeIndicesLeft(&LB.row[LB.col[k]+theFirst], k_length-theFirst,
0, indices, relative);
ScatterSuperUpdate(lc->storage, k_length-theFirst, k_length-theFirst,
domain_update, j_len, relative);
}
else {
FindRelativeIndicesLeft(&LB.row[LB.col[k]+theFirst],
k_length-theFirst, 0, indices, relative);
ModifySuperByColumn((double *) &LB.entry[LB.col[k]+theFirst].nz,
theLast-theFirst, k_length-theFirst,
domain_update, j_len, relative);
}
}
if (domain_update) {
DistributeUpdateFO(which_domain, MyNum, lc);
}
MyFree(relative); MyFree(indices);
}
CompleteSupernodeB(super)
{
int i, length, fits, first, last;
if (node[super] == 1) {
CompleteColumnB(super);
return;
}
length = LB.col[super+1]-LB.col[super];
fits = FitsInCache/length;
if (fits > 4)
fits &= 0xfffffffc;
else if (fits < 2)
fits = node[super];
first = super;
while (first<super+node[super]) {
last = first+fits;
if (last > super+node[super])
last = super+node[super];
i = first;
for (; i<last-1; i+=2) {
ModifyTwoBySupernodeB(first, i, i-first,
(double *) &LB.entry[LB.col[i]].nz,
(double *) &LB.entry[LB.col[i+1]].nz);
CompleteColumnB(i);
ModifyBySupernodeB(i, i+1, 1, (double *) &LB.entry[LB.col[i+1]].nz);
CompleteColumnB(i+1);
}
for (; i<last; i++) {
ModifyBySupernodeB(first, i, i-first,
(double *) &LB.entry[LB.col[i]].nz);
CompleteColumnB(i);
}
i = last;
for (; i<super+node[super]-1; i+=2)
ModifyTwoBySupernodeB(first, last, i-first,
(double *) &LB.entry[LB.col[i]].nz,
(double *) &LB.entry[LB.col[i+1]].nz);
for (; i<super+node[super]; i++)
ModifyBySupernodeB(first, last, i-first,
(double *) &LB.entry[LB.col[i]].nz);
first = last;
}
}
CompleteColumnB(j)
{
double recip, diag, *theNZ, *last;
theNZ = &LB.entry[LB.col[j]].nz;
last = &LB.entry[LB.col[j+1]].nz;
diag = *theNZ;
if (diag <= 0.0) {
printf("Negative pivot, d[%d] = %f\n", j, diag);
exit(0);
}
diag = sqrt(diag);
*theNZ++ = diag;
recip = 1.0/diag;
while (theNZ != last)
*theNZ++ *= recip;
}
/* given a src_structure non-zero structure with given length,
and assuming that global 'indices' contains structure of dest,
find relative indices */
FindRelativeIndicesLeft(src_structure, rows_in_update, offset,
indices, relative)
int *src_structure, *indices, *relative;
{
int i, *leftRow, *last;
leftRow = src_structure;
last = &src_structure[rows_in_update];
i = 0;
while (leftRow != last)
relative[i++] = indices[*leftRow++] - offset;
}
/* given a panel update 'update' with given size and relative indices,
scatter into into dest_nz */
ScatterSuperUpdate(update, cols_in_update, rows_in_update,
dest_nz, dest_len, relative)
double *update, *dest_nz;
int *relative;
{
int i, dest, *last, *leftRow;
double *theTmp, *rightNZ;
theTmp = update;
for (i=0; i<cols_in_update; i++) {
leftRow = relative+i;
last = relative + rows_in_update;
dest = leftRow[0];
rightNZ = dest_nz + dest*dest_len - dest*(dest+1)/2;
while (leftRow != last) {
rightNZ[*leftRow] += *theTmp;
*theTmp = 0.0;
theTmp++; leftRow++;
}
}
}
/* given a panel update 'update' with given size and relative indices,
scatter into into dest_nz */
ModifySuperByColumn(src_nz, cols_in_update, rows_in_update,
dest_nz, dest_len, relative)
double *src_nz, *dest_nz;
int *relative;
{
int i, dest, *last, *leftRow;
double ljk, *theTmp, *rightNZ;
for (i=0; i<cols_in_update; i++) {
leftRow = relative+i;
last = relative + rows_in_update;
dest = leftRow[0];
rightNZ = dest_nz + dest*dest_len - dest*(dest+1)/2;
theTmp = src_nz+i;
ljk = *theTmp;
while (leftRow != last) {
rightNZ[*leftRow] -= ljk*(*theTmp);
theTmp++; leftRow++;
}
}
}
SetDestIndices(super, indices)
int *indices;
{
int i, *rightRow, *lastRow;
rightRow = &LB.row[LB.col[super]];
lastRow = rightRow + (LB.col[super+1]-
LB.col[super]);
i = 0;
while (rightRow != lastRow)
indices[*rightRow++] = i++;
}
SetDomainIndices(super, indices)
int *indices;
{
int i, *rightRow, *lastRow;
rightRow = &LB.row[LB.col[super]+1];
lastRow = rightRow-1 + (LB.col[super+1]-LB.col[super]);
i = 0;
while (rightRow != lastRow)
indices[*rightRow++] = i++;
}
ModifySuperBySuper(src, theFirst, theLast, length, dest)
double *dest;
{
int i, fits;
int first, last, lastcol;
int this_length;
double *destination;
fits = FitsInCache/length;
if (fits > 4)
fits &= 0xfffffffc;
else if (fits < 2)
fits = node[src];
lastcol = src+node[src];
last = src;
while (last < lastcol) {
first = last;
last = first + fits;
if (last > lastcol)
last = lastcol;
destination = dest;
this_length = length-theFirst;
i = theFirst;
for (; i<theLast-1; i+=2) {
ModifyTwoBySupernodeB(first, last, i-(first-src),
destination, destination+this_length);
destination += this_length + this_length - 1; this_length-=2;
}
for (; i<theLast; i++) {
ModifyBySupernodeB(first, last, i-(first-src), destination);
destination += this_length; this_length--;
}
}
}
ModifyTwoBySupernodeB(super, lastcol, theFirst,
destination0, destination1)
double *destination0, *destination1;
{
int col, increment;
double ljk0_0, ljk0_1, ljk1_0, ljk1_1, ljk2_0, ljk2_1, ljk3_0, ljk3_1;
double ljk4_0, ljk4_1, ljk5_0, ljk5_1, ljk6_0, ljk6_1, ljk7_0, ljk7_1;
double d0, d1, tmp0, tmp1;
double *last, *dest0, *dest1;
double *srcNZ0, *srcNZ1, *srcNZ2, *srcNZ3;
double *srcNZ4, *srcNZ5, *srcNZ6, *srcNZ7;
col = super;
while (col < lastcol-7) {
dest0 = destination0; dest1 = destination1;
last = &LB.entry[LB.col[col+1]].nz;
increment = LB.col[col+1] - LB.col[col];
srcNZ0 = &LB.entry[LB.col[col] + (super-col) + theFirst].nz;
srcNZ1 = srcNZ0 + increment - 1;
srcNZ2 = srcNZ1 + increment - 2;
srcNZ3 = srcNZ2 + increment - 3;
srcNZ4 = srcNZ3 + increment - 4;
srcNZ5 = srcNZ4 + increment - 5;
srcNZ6 = srcNZ5 + increment - 6;
srcNZ7 = srcNZ6 + increment - 7;
ljk0_0 = *srcNZ0++; ljk0_1 = *srcNZ0++;
ljk1_0 = *srcNZ1++; ljk1_1 = *srcNZ1++;
ljk2_0 = *srcNZ2++; ljk2_1 = *srcNZ2++;
ljk3_0 = *srcNZ3++; ljk3_1 = *srcNZ3++;
ljk4_0 = *srcNZ4++; ljk4_1 = *srcNZ4++;
ljk5_0 = *srcNZ5++; ljk5_1 = *srcNZ5++;
ljk6_0 = *srcNZ6++; ljk6_1 = *srcNZ6++;
ljk7_0 = *srcNZ7++; ljk7_1 = *srcNZ7++;
*dest0++ -= ljk0_0*ljk0_0 + ljk1_0*ljk1_0 + ljk2_0*ljk2_0 + ljk3_0*ljk3_0
+ ljk4_0*ljk4_0 + ljk5_0*ljk5_0 + ljk6_0*ljk6_0 + ljk7_0*ljk7_0;
*dest1++ -= ljk0_1*ljk0_1 + ljk1_1*ljk1_1 + ljk2_1*ljk2_1 + ljk3_1*ljk3_1
+ ljk4_1*ljk4_1 + ljk5_1*ljk5_1 + ljk6_1*ljk6_1 + ljk7_1*ljk7_1;
*dest0++ -= ljk0_0*ljk0_1 + ljk1_0*ljk1_1 + ljk2_0*ljk2_1 + ljk3_0*ljk3_1
+ ljk4_0*ljk4_1 + ljk5_0*ljk5_1 + ljk6_0*ljk6_1 + ljk7_0*ljk7_1;
while (srcNZ0 != last) {
d0 = *dest0; d1 = *dest1;
tmp0 = *srcNZ0++; d0 -= ljk0_0*tmp0; d1 -= ljk0_1*tmp0;
tmp1 = *srcNZ1++; d0 -= ljk1_0*tmp1; d1 -= ljk1_1*tmp1;
tmp0 = *srcNZ2++; d0 -= ljk2_0*tmp0; d1 -= ljk2_1*tmp0;
tmp1 = *srcNZ3++; d0 -= ljk3_0*tmp1; d1 -= ljk3_1*tmp1;
tmp0 = *srcNZ4++; d0 -= ljk4_0*tmp0; d1 -= ljk4_1*tmp0;
tmp1 = *srcNZ5++; d0 -= ljk5_0*tmp1; d1 -= ljk5_1*tmp1;
tmp0 = *srcNZ6++; d0 -= ljk6_0*tmp0; d1 -= ljk6_1*tmp0;
tmp1 = *srcNZ7++; d0 -= ljk7_0*tmp1; d1 -= ljk7_1*tmp1;
*dest0++ = d0; *dest1++ = d1;
}
col += 8;
}
while (col < lastcol-3) {
dest0 = destination0; dest1 = destination1;
last = &LB.entry[LB.col[col+1]].nz;
increment = LB.col[col+1] - LB.col[col];
srcNZ0 = &LB.entry[LB.col[col] + (super-col) + theFirst].nz;
srcNZ1 = srcNZ0 + increment - 1;
srcNZ2 = srcNZ1 + increment - 2;
srcNZ3 = srcNZ2 + increment - 3;
ljk0_0 = *srcNZ0++; ljk0_1 = *srcNZ0++;
ljk1_0 = *srcNZ1++; ljk1_1 = *srcNZ1++;
ljk2_0 = *srcNZ2++; ljk2_1 = *srcNZ2++;
ljk3_0 = *srcNZ3++; ljk3_1 = *srcNZ3++;
*dest0++ -= ljk0_0*ljk0_0 + ljk1_0*ljk1_0 + ljk2_0*ljk2_0 + ljk3_0*ljk3_0;
*dest1++ -= ljk0_1*ljk0_1 + ljk1_1*ljk1_1 + ljk2_1*ljk2_1 + ljk3_1*ljk3_1;
*dest0++ -= ljk0_0*ljk0_1 + ljk1_0*ljk1_1 + ljk2_0*ljk2_1 + ljk3_0*ljk3_1;
while (srcNZ0 != last) {
d0 = *dest0; d1 = *dest1;
tmp0 = *srcNZ0++; d0 -= ljk0_0*tmp0; d1 -= ljk0_1*tmp0;
tmp1 = *srcNZ1++; d0 -= ljk1_0*tmp1; d1 -= ljk1_1*tmp1;
tmp0 = *srcNZ2++; d0 -= ljk2_0*tmp0; d1 -= ljk2_1*tmp0;
tmp1 = *srcNZ3++; d0 -= ljk3_0*tmp1; d1 -= ljk3_1*tmp1;
*dest0++ = d0; *dest1++ = d1;
}
col+=4;
}
while (col < lastcol-1) {
last = &LB.entry[LB.col[col+1]].nz;
dest0 = destination0; dest1 = destination1;
increment = LB.col[col+1] - LB.col[col];
srcNZ0 = &LB.entry[LB.col[col] + (super-col) + theFirst].nz;
srcNZ1 = srcNZ0 + increment - 1;
ljk0_0 = *srcNZ0++; ljk0_1 = *srcNZ0++;
ljk1_0 = *srcNZ1++; ljk1_1 = *srcNZ1++;
*dest0++ -= ljk0_0*ljk0_0 + ljk1_0*ljk1_0;
*dest1++ -= ljk0_1*ljk0_1 + ljk1_1*ljk1_1;
*dest0++ -= ljk0_0*ljk0_1 + ljk1_0*ljk1_1;
while (srcNZ0 != last) {
tmp0 = *srcNZ0++; tmp1 = *srcNZ1++;
*dest0++ -= ljk0_0*tmp0 + ljk1_0*tmp1;
*dest1++ -= ljk0_1*tmp0 + ljk1_1*tmp1;
}
col+=2;
}
while (col < lastcol) {
last = &LB.entry[LB.col[col+1]].nz;
dest0 = destination0; dest1 = destination1;
srcNZ0 = &LB.entry[LB.col[col] + (super-col) + theFirst].nz;
ljk0_0 = *srcNZ0++;
ljk0_1 = *srcNZ0++;
*dest0++ -= ljk0_0*ljk0_0;
*dest1++ -= ljk0_1*ljk0_1;
*dest0++ -= ljk0_0*ljk0_1;
while (srcNZ0 != last) {
tmp0 = *srcNZ0++;
*dest0++ -= ljk0_0*tmp0; *dest1++ -= ljk0_1*tmp0;
}
col++;
}
}
ModifyBySupernodeB(super, lastcol, theFirst, destination)
double *destination;
{
double t0, ljk0, ljk1, ljk2, ljk3, ljk4, ljk5, ljk6, ljk7;
int increment;
double *dest, *last;
double *theNZ0, *theNZ1, *theNZ2, *theNZ3, *theNZ4, *theNZ5, *theNZ6,*theNZ7;
int j, col;
j = LB.row[LB.col[super]+theFirst];
col = super;
while (col < lastcol - 7) {
/* eight source columns */
last = &LB.entry[LB.col[col+1]].nz;
dest = destination;
increment = LB.col[col+1] - LB.col[col];
theNZ0 = &LB.entry[LB.col[col] + (super-col) + theFirst].nz;
theNZ1 = theNZ0 + increment - 1;
theNZ2 = theNZ1 + increment - 2;
theNZ3 = theNZ2 + increment - 3;
theNZ4 = theNZ3 + increment - 4;
theNZ5 = theNZ4 + increment - 5;
theNZ6 = theNZ5 + increment - 6;
theNZ7 = theNZ6 + increment - 7;
ljk0 = *theNZ0++; ljk1 = *theNZ1++; ljk2 = *theNZ2++; ljk3 = *theNZ3++;
ljk4 = *theNZ4++; ljk5 = *theNZ5++; ljk6 = *theNZ6++; ljk7 = *theNZ7++;
*dest++ -= ljk0*ljk0 + ljk1*ljk1 + ljk2*ljk2 + ljk3*ljk3
+ ljk4*ljk4 + ljk5*ljk5 + ljk6*ljk6 + ljk7*ljk7;
while (theNZ0 != last) {
t0 = *dest;
t0 -= ljk0*(*theNZ0++);
t0 -= ljk1*(*theNZ1++);
t0 -= ljk2*(*theNZ2++);
t0 -= ljk3*(*theNZ3++);
t0 -= ljk4*(*theNZ4++);
t0 -= ljk5*(*theNZ5++);
t0 -= ljk6*(*theNZ6++);
t0 -= ljk7*(*theNZ7++);
*dest++ = t0;
}
col += 8;
}
while (col < lastcol - 3) {
/* four source columns */
last = &LB.entry[LB.col[col+1]].nz;
dest = destination;
increment = LB.col[col+1] - LB.col[col];
theNZ0 = &LB.entry[theFirst + LB.col[col] + (super-col)].nz;
theNZ1 = theNZ0 + increment - 1;
theNZ2 = theNZ1 + increment - 2;
theNZ3 = theNZ2 + increment - 3;
ljk0 = *theNZ0++; ljk1 = *theNZ1++; ljk2 = *theNZ2++; ljk3 = *theNZ3++;
*dest++ -= ljk0*ljk0 + ljk1*ljk1 + ljk2*ljk2 + ljk3*ljk3;
while (theNZ0 != last) {
t0 = *dest;
t0 -= ljk0*(*theNZ0++);
t0 -= ljk1*(*theNZ1++);
t0 -= ljk2*(*theNZ2++);
t0 -= ljk3*(*theNZ3++);
*dest++ = t0;
}
col += 4;
}
while (col < lastcol) {
/* one source column */
last = &LB.entry[LB.col[col+1]].nz;
dest = destination;
theNZ0 = &LB.entry[theFirst + LB.col[col] + (super-col)].nz;
ljk0 = *theNZ0++;
*dest++ -= ljk0*ljk0;
while (theNZ0 < last - 3) {
*dest -= ljk0*(*theNZ0);
*(dest+1) -= ljk0*(*(theNZ0+1));
*(dest+2) -= ljk0*(*(theNZ0+2));
*(dest+3) -= ljk0*(*(theNZ0+3));
dest += 4; theNZ0 += 4;
}
while (theNZ0 != last)
*dest++ -= ljk0*(*theNZ0++);
col++;
}
}