blob: 2049e2716b32ce3f9bc0e1a3bb95dfa6d69a4c28 [file] [log] [blame]
#ifndef CROSS_HPP
#define CROSS_HPP
#include "complex-type.h"
#ifdef FFTW3
#include <fftw3-mpi.h>
#else
#include <fftw_mpi.h>
#endif
#include <algorithm>
#include <vector>
#include "allocator.hpp"
#include "distribution.hpp"
#include "solver.hpp"
#include <string.h>
// pgCC doesn't yet play well with C99 constructs, so...
#ifdef __PGI__
extern "C" long int lrint(double x);
#endif
#define FFTW_ADDR(X) reinterpret_cast<fftw_complex*>(&(X)[0])
class CrossBase : public Distribution {
public:
// methods
CrossBase()
{
}
CrossBase(MPI_Comm comm, int ng)
: Distribution(comm, ng)
{
std::vector<int> n;
n.assign(3, ng);
initialize(comm, n);
}
CrossBase(MPI_Comm comm, std::vector<int> const & n)
: Distribution(comm, n)
{
initialize(comm, n);
}
virtual ~CrossBase()
{
#ifdef FFTW3
fftw_destroy_plan(m_plan_f);
fftw_destroy_plan(m_plan_b);
#else
fftwnd_mpi_destroy_plan(m_plan_f);
fftwnd_mpi_destroy_plan(m_plan_b);
#endif
}
// solve interfaces
//NOT SURE I DID THIS CORRECTLY FOR FFTW2 -- ADRIAN
void forward(complex_t const *rho1, complex_t const *rho2)
{
//forward rho1
#ifdef FFTW3
distribution_3_to_1(rho1, &m_buf1[0], &m_d); // rho1 --> buf1
fftw_execute(m_plan_f); // buf1 --> buf2
#else
distribution_3_to_1(rho1, &m_buf2[0], &m_d); // rho1 --> buf2
fftwnd_mpi(m_plan_f, 1, FFTW_ADDR(m_buf2), FFTW_ADDR(m_buf3),
FFTW_NORMAL_ORDER); // buf2 --> buf3
#endif
//copy transformed rho1 (in buf2) to a safe place (buf3)
memcpy( &m_buf3[0], &m_buf2[0], local_size()*sizeof(complex_t) );
//forward rho2
#ifdef FFTW3
distribution_3_to_1(rho2, &m_buf1[0], &m_d); // rho2 --> buf1
fftw_execute(m_plan_f); // buf1 --> buf2
#else
distribution_3_to_1(rho2, &m_buf2[0], &m_d); // rho2 --> buf2
fftwnd_mpi(m_plan_f, 1, FFTW_ADDR(m_buf2), FFTW_ADDR(m_buf3),
FFTW_NORMAL_ORDER); // buf2 --> buf3
#endif
}
void backward_xi(complex_t *phi)
{
//now, transformed rho1 in buf3, transformed rho2 in buf2
//need intermediate result in buf1
for(int i=0; i<local_size(); i++) {
m_buf1[i] = m_buf2[i] * conj(m_buf3[i]);
}
//it would be nice to set (0,0,0) mode to 0
int index = 0;
for (int local_k0 = 0; local_k0 < local_ng_1d(0); ++local_k0) {
int k0 = local_k0 + self_1d(0) * local_ng_1d(0);
for (int k1 = 0; k1 < local_ng_1d(1); ++k1) {
for (int k2 = 0; k2 < local_ng_1d(2); ++k2) {
if(k0 == 0 && k1==0 && k2==0) {
m_buf1[index] *= 0.0;
}
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
#ifdef FFTW3
fftw_execute(m_plan_b); // buf1 --> buf3
distribution_1_to_3(&m_buf3[0], phi, &m_d); // buf3 --> phi
#else
fftwnd_mpi(m_plan_b, 1,
(fftw_complex *) &m_buf1[0],
(fftw_complex *) &m_buf3[0],
FFTW_NORMAL_ORDER); // buf1 -->buf1
distribution_1_to_3(&m_buf1[0], phi, &m_d); // buf3 --> phi
#endif
}
// interfaces for std::vector
void forward(std::vector<complex_t> const & rho1,
std::vector<complex_t> const & rho2)
{
forward(&rho1[0], &rho2[0]);
}
void backward_xi(std::vector<complex_t> & phi)
{
backward_xi(&phi[0]);
}
// analysis interfaces
///
// calculate the k-space power spectrum
// P(modk) = Sum { |rho(k)|^2 : |k| = modk, k <- [0, ng / 2)^3, periodically extended }
///
void power_spectrum(std::vector<double> & power)
{
std::vector<complex_t, fftw_allocator<complex_t> > const & rho1 = m_buf3;
std::vector<complex_t, fftw_allocator<complex_t> > const & rho2 = m_buf2;
std::vector<int> ksq;
std::vector<double> weight;
int ng = m_d.n[0];
double volume = 1.0 * ng * ng * ng;
// cache periodic ksq
ksq.resize(ng);
double ksq_max = 0;
for (int k = 0; k < ng / 2; ++k) {
ksq[k] = k * k;
ksq_max = max(ksq_max, ksq[k]);
ksq[k + ng / 2] = (k - ng / 2) * (k - ng / 2);
ksq_max = max(ksq_max, ksq[k + ng / 2]);
}
long modk_max = lrint(sqrt(3 * ksq_max)); // round to nearest integer
// calculate power spectrum
power.resize(modk_max + 1);
power.assign(modk_max + 1, 0.0);
weight.resize(modk_max + 1);
weight.assign(modk_max + 1, 0.0);
int index = 0;
for (int local_k0 = 0; local_k0 < local_ng_1d(0); ++local_k0) {
int k0 = local_k0 + self_1d(0) * local_ng_1d(0);
double ksq0 = ksq[k0];
for (int k1 = 0; k1 < local_ng_1d(1); ++k1) {
double ksq1 = ksq[k1];
for (int k2 = 0; k2 < local_ng_1d(2); ++k2) {
double ksq2 = ksq[k2];
// round to nearest integer
long modk = lrint(sqrt(ksq0 + ksq1 + ksq2));
power[modk] += real(rho1[index] * conj(rho2[index]));
weight[modk] += volume;
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
// accumulate across processors
MPI_Allreduce(MPI_IN_PLACE, &power[0], power.size(), MPI_DOUBLE, MPI_SUM, cart_1d());
MPI_Allreduce(MPI_IN_PLACE, &weight[0], weight.size(), MPI_DOUBLE, MPI_SUM, cart_1d());
//make sure we don't divide by zero
for(int i = 0; i < weight.size(); ++i) {
weight[i] += 1.0 * (weight[i] < 1.0);
}
// scale power by weight
std::transform(power.begin(), power.end(), weight.begin(), power.begin(), std::divides<double>());
}
///
// General initialization
///
void initialize(MPI_Comm comm, std::vector<int> n, bool transposed_order = false)
{
int flags_f;
int flags_b;
distribution_init(comm, &n[0], &n[0], &m_d, false);
distribution_assert_commensurate(&m_d);
#ifdef FFTW3
fftw_mpi_init();
#endif
m_buf1.resize(local_size());
m_buf2.resize(local_size());
m_buf3.resize(local_size());
// create plan for forward and backward DFT's
flags_f = flags_b = FFTW_ESTIMATE;
#ifdef FFTW3
if (transposed_order) {
flags_f |= FFTW_MPI_TRANSPOSED_OUT;
flags_b |= FFTW_MPI_TRANSPOSED_IN;
}
m_plan_f = fftw_mpi_plan_dft_3d(n[0], n[1], n[2],
FFTW_ADDR(m_buf1), FFTW_ADDR(m_buf2),
comm, FFTW_FORWARD, flags_f);
m_plan_b = fftw_mpi_plan_dft_3d(n[0], n[1], n[2],
FFTW_ADDR(m_buf1), FFTW_ADDR(m_buf3),
comm, FFTW_BACKWARD, flags_b);
#else
m_plan_f = fftw3d_mpi_create_plan(comm, n[0], n[1], n[2], FFTW_FORWARD, flags_f);
m_plan_b = fftw3d_mpi_create_plan(comm, n[0], n[1], n[2], FFTW_BACKWARD, flags_b);
#endif
}
protected:
double max(double a, double b) { return a > b ? a : b; }
std::vector<complex_t, fftw_allocator<complex_t> > m_buf1;
std::vector<complex_t, fftw_allocator<complex_t> > m_buf2;
std::vector<complex_t, fftw_allocator<complex_t> > m_buf3;
#ifdef FFTW3
fftw_plan m_plan_f;
fftw_plan m_plan_b;
#else
fftwnd_mpi_plan m_plan_f;
fftwnd_mpi_plan m_plan_b;
#endif
};
#endif