blob: 8edd3479c3064a7b3bba29ecc2407f2c587c4df8 [file] [log] [blame]
#ifndef SOLVER_HPP
#define SOLVER_HPP
#include "complex-type.h"
#if !defined(FFTW3) && defined(PENCIL)
#error PENCIL FFT REQUIRES FFTW3
#endif
#if defined(FFTW3) && defined(PENCIL)
#ifdef ESSL_FFTW
#include <fftw3_essl.h>
#else
#include <fftw3.h>
#endif
#elif defined(FFTW3)
#include <fftw3-mpi.h>
#else
#include <fftw_mpi.h>
#endif
#include <algorithm>
#include <vector>
#include <cassert>
#include "allocator.hpp"
#include "distribution.hpp"
#include "bigchunk.h"
#define CERRILLOS_SLAB_HACK 1
const double pi = 3.14159265358979323846;
// pgCC doesn't yet play well with C99 constructs, so...
#ifdef __PGI__
extern "C" long int lrint(double x);
#endif
#define FFTW_ADDR(X) reinterpret_cast<fftw_complex*>(&(X)[0])
///
// Abstract base class for Poisson solvers.
//
// Derived classes must provide their own implementation of
// initialize_greens_function(), and, if necessary, override the
// backward_solve() and backward_solve_gradient() methods.
///
class SolverBase : public Distribution {
public:
// methods
SolverBase()
{
}
SolverBase(MPI_Comm comm, int ng)
: Distribution(comm, ng)
{
initialize(comm);
}
SolverBase(MPI_Comm comm, std::vector<int> const & n)
: Distribution(comm, n)
{
initialize(comm);
}
virtual ~SolverBase()
{
#if defined(FFTW3) && defined(PENCIL)
fftw_destroy_plan(m_plan_f_x);
fftw_destroy_plan(m_plan_f_y);
fftw_destroy_plan(m_plan_f_z);
fftw_destroy_plan(m_plan_b_x);
fftw_destroy_plan(m_plan_b_y);
fftw_destroy_plan(m_plan_b_z);
#elif defined(FFTW3)
fftw_destroy_plan(m_plan_f);
fftw_destroy_plan(m_plan_b);
#else
fftwnd_mpi_destroy_plan(m_plan_f);
fftwnd_mpi_destroy_plan(m_plan_b);
#endif
}
// solve interfaces
void forward_solve(complex_t const *rho)
{
#if defined(FFTW3) && defined(PENCIL)
distribution_3_to_2(rho, &m_buf1[0], &m_d, 0); // rho --> buf1
fftw_execute(m_plan_f_x); // buf1 --> buf2
distribution_2_to_3(&m_buf2[0], &m_buf1[0], &m_d, 0); // buf2 --> buf1
distribution_3_to_2(&m_buf1[0], &m_buf2[0], &m_d, 1); // buf1 --> buf2
fftw_execute(m_plan_f_y); // buf2 --> buf1
distribution_2_to_3(&m_buf1[0], &m_buf2[0], &m_d, 1); // buf1 --> buf2
distribution_3_to_2(&m_buf2[0], &m_buf1[0], &m_d, 2); // buf2 --> buf1
fftw_execute(m_plan_f_z); // buf1 --> buf2
#elif defined(FFTW3)
distribution_3_to_1(rho, &m_buf1[0], &m_d); // rho --> buf1
fftw_execute(m_plan_f); // buf1 --> buf2
#else
distribution_3_to_1(rho, &m_buf2[0], &m_d); // rho --> buf2
fftwnd_mpi(m_plan_f, 1,
FFTW_ADDR(m_buf2),
FFTW_ADDR(m_buf3),
FFTW_NORMAL_ORDER); // buf2 -->buf2
#endif
}
void backward_solve(complex_t *phi)
{
kspace_solve(&m_buf2[0], &m_buf1[0]); // buf2 --> buf1
#if defined(FFTW3) && defined(PENCIL)
fftw_execute(m_plan_b_z); // buf1 --> buf3
distribution_2_to_3(&m_buf3[0], &m_buf1[0], &m_d, 2); // buf3 --> buf1
distribution_3_to_2(&m_buf1[0], &m_buf3[0], &m_d, 1); // buf1 --> buf3
fftw_execute(m_plan_b_y); // buf3 --> buf1
distribution_2_to_3(&m_buf1[0], &m_buf3[0], &m_d, 1); // buf1 --> buf3
distribution_3_to_2(&m_buf3[0], &m_buf1[0], &m_d, 0); // buf3 --> buf1
fftw_execute(m_plan_b_x); // buf1 --> buf3
distribution_2_to_3(&m_buf3[0], phi, &m_d, 0); // buf3 --> phi
#elif defined(FFTW3)
fftw_execute(m_plan_b); // buf1 --> buf3
distribution_1_to_3(&m_buf3[0], phi, &m_d); // buf3 --> phi
#else
fftwnd_mpi(m_plan_b, 1,
(fftw_complex *) &m_buf1[0],
(fftw_complex *) &m_buf3[0],
FFTW_NORMAL_ORDER); // buf1 -->buf1
distribution_1_to_3(&m_buf1[0], phi, &m_d); // buf1 --> phi
#endif
}
void backward_solve_gradient(int axis, complex_t *grad_phi)
{
kspace_solve_gradient(axis, &m_buf2[0], &m_buf1[0]); // buf2 --> buf1
#if defined(FFTW3) && defined(PENCIL)
fftw_execute(m_plan_b_z); // buf1 --> buf3
distribution_2_to_3(&m_buf3[0], &m_buf1[0], &m_d, 2); // buf3 --> buf1
distribution_3_to_2(&m_buf1[0], &m_buf3[0], &m_d, 1); // buf1 --> buf3
fftw_execute(m_plan_b_y); // buf3 --> buf1
distribution_2_to_3(&m_buf1[0], &m_buf3[0], &m_d, 1); // buf1 --> buf3
distribution_3_to_2(&m_buf3[0], &m_buf1[0], &m_d, 0); // buf3 --> buf1
fftw_execute(m_plan_b_x); // buf1 --> buf3
distribution_2_to_3(&m_buf3[0], grad_phi, &m_d, 0); // buf3 --> grad_phi
#elif defined(FFTW3)
fftw_execute(m_plan_b); // buf1 --> buf3
distribution_1_to_3(&m_buf3[0], grad_phi, &m_d); // buf3 --> grad_phi
#else
fftwnd_mpi(m_plan_b, 1,
(fftw_complex *) &m_buf1[0],
(fftw_complex *) &m_buf3[0],
FFTW_NORMAL_ORDER); // buf1 -> buf1
distribution_1_to_3(&m_buf1[0], grad_phi, &m_d); // buf1 --> grad_phi
#endif
}
void solve(const complex_t *rho, complex_t *phi)
{
forward_solve(rho);
backward_solve(phi);
}
void solve_gradient(int axis, const complex_t *rho, complex_t *phi)
{
forward_solve(rho);
backward_solve_gradient(axis, phi);
}
// interfaces for std::vector
void forward_solve(std::vector<complex_t> const & rho)
{
forward_solve(&rho[0]);
}
void backward_solve(std::vector<complex_t> & phi)
{
backward_solve(&phi[0]);
}
void backward_solve_gradient(int axis, std::vector<complex_t> & phi)
{
backward_solve_gradient(axis, &phi[0]);
}
void solve(std::vector<complex_t> const & rho, std::vector<complex_t> & phi)
{
solve(&rho[0], &phi[0]);
}
void solve_gradient(int axis, std::vector<complex_t> const & rho, std::vector<complex_t> & phi)
{
solve_gradient(axis, &rho[0], &phi[0]);
}
// analysis interfaces
///
// calculate the k-space power spectrum
// P(modk) = Sum { |rho(k)|^2 : |k| = modk, k <- [0, ng / 2)^3, periodically extended }
///
void power_spectrum(std::vector<double> & power)
{
//intermediate in m_buf2 for both FFTW2 and FFTW3
std::vector<complex_t, bigchunk_allocator<complex_t> > const & rho = m_buf2;
int ng = m_d.n[0];
double volume = 1.0 * ng * ng * ng;
double kk, tpi;
tpi = 2.0*atan(1.0)*4.0;
// cache periodic ksq
m_pk_ksq.resize(ng);
m_pk_cic.resize(ng);
double ksq_max = 0;
for (int k = 0; k < ng / 2; ++k) {
m_pk_ksq[k] = k * k;
ksq_max = max(ksq_max, m_pk_ksq[k]);
m_pk_ksq[k + ng / 2] = (k - ng / 2) * (k - ng / 2);
ksq_max = max(ksq_max, m_pk_ksq[k + ng / 2]);
kk = tpi*k/ng;
m_pk_cic[k] = pow(sin(0.5*kk)/(0.5*kk),-4.0);
kk = tpi*(k-ng/2)/ng;
m_pk_cic[k + ng/2] = pow(sin(0.5*kk)/(0.5*kk),-4.0);
}
m_pk_cic[0] = 1.0;
long modk_max = lrint(sqrt(3 * ksq_max)); // round to nearest integer
// calculate power spectrum
power.resize(modk_max + 1);
power.assign(modk_max + 1, 0.0);
m_pk_weight.resize(modk_max + 1);
m_pk_weight.assign(modk_max + 1, 0.0);
/*
!-----3-D anti-CIC filter for deconvolution
forall(ii=1:ng)kk(ii)=(ii-1)*tpi/(1.0*ng)
do ii=1,ng
if(ii.ge.ng/2+1)kk(ii)=(ii-ng-1)*tpi/(1.0*ng)
enddo
mult(1)=1.0
forall(ii=2:ng)mult(ii)=
# 1.0/(sin(kk(ii)/2.0)/(kk(ii)/2.0))**2
forall(ii=1:ng,jj=1:ng,mm=1:ng)erhotr(ii,jj,mm)=
# mult(ii)*mult(jj)*mult(mm)*erhotr(ii,jj,mm)
*/
int local_dim[3];
int self_coord[3];
#if defined(FFTW3) && defined(PENCIL)
self_coord[0]=self_2d_z(0);
self_coord[1]=self_2d_z(1);
self_coord[2]=self_2d_z(2);
local_dim[0]=local_ng_2d_z(0);
local_dim[1]=local_ng_2d_z(1);
local_dim[2]=local_ng_2d_z(2);
#else
self_coord[0]=self_1d(0);
self_coord[1]=self_1d(1);
self_coord[2]=self_1d(2);
local_dim[0]=local_ng_1d(0);
local_dim[1]=local_ng_1d(1);
local_dim[2]=local_ng_1d(2);
#endif
int index = 0;
for (int local_k0 = 0; local_k0 < local_dim[0]; ++local_k0) {
int k0 = local_k0 + self_coord[0] * local_dim[0];
double ksq0 = m_pk_ksq[k0];
for (int local_k1 = 0; local_k1 < local_dim[1]; ++local_k1) {
int k1 = local_k1 + self_coord[1] * local_dim[1];
double ksq1 = m_pk_ksq[k1];
for (int local_k2 = 0; local_k2 < local_dim[2]; ++local_k2) {
int k2 = local_k2 + self_coord[2] * local_dim[2];
double ksq2 = m_pk_ksq[k2];
long modk = lrint(sqrt(ksq0 + ksq1 + ksq2)); //round to nearest int
//power[modk] += real(rho[index] * conj(rho[index]));
power[modk] += std::real(rho[index] * conj(rho[index])) * m_pk_cic[k0] * m_pk_cic[k1] * m_pk_cic[k2];
m_pk_weight[modk] += volume;
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
// accumulate across processors
MPI_Allreduce(MPI_IN_PLACE, &power[0], power.size(),
MPI_DOUBLE, MPI_SUM, cart_1d());
MPI_Allreduce(MPI_IN_PLACE, &m_pk_weight[0], m_pk_weight.size(),
MPI_DOUBLE, MPI_SUM, cart_1d());
//make sure we don't divide by zero
for(size_t i = 0; i < m_pk_weight.size(); ++i) {
m_pk_weight[i] += 1.0 * (m_pk_weight[i] < 1.0);
}
// scale power by weight
std::transform(power.begin(), power.end(),
m_pk_weight.begin(), power.begin(),
std::divides<double>());
}
///
// General initialization
///
void initialize(MPI_Comm comm, bool transposed_order = false)
{
int flags_f;
int flags_b;
// distribution_init(comm, &n[0], &n[0], &m_d, false);
// distribution_assert_commensurate(&m_d);
#if defined(FFTW3) && !defined(PENCIL)
fftw_mpi_init();
#endif
m_greens_functions_initialized = false;
m_buf1.resize(local_size());
m_buf2.resize(local_size());
m_buf3.resize(local_size());
// create plan for forward and backward DFT's
flags_f = flags_b = FFTW_ESTIMATE;
#if defined(FFTW3) && defined(PENCIL)
m_plan_f_x = fftw_plan_many_dft(1, // rank
&(m_d.process_topology_2_x.n[0]), // const int *n,
m_d.process_topology_2_x.n[1] * m_d.process_topology_2_x.n[2], // howmany
FFTW_ADDR(m_buf1),
NULL, // const int *inembed,
1, // int istride,
m_d.process_topology_2_x.n[0], // int idist,
FFTW_ADDR(m_buf2),
NULL, // const int *onembed,
1, // int ostride,
m_d.process_topology_2_x.n[0], // int odist,
FFTW_FORWARD, // int sign,
0); // unsigned flags
m_plan_f_y = fftw_plan_many_dft(1, // rank
&(m_d.process_topology_2_y.n[1]), // const int *n,
m_d.process_topology_2_y.n[0] * m_d.process_topology_2_y.n[2], // howmany
FFTW_ADDR(m_buf2),
NULL, // const int *inembed,
1, // int istride,
m_d.process_topology_2_y.n[1], // int idist,
FFTW_ADDR(m_buf1),
NULL, // const int *onembed,
1, // int ostride,
m_d.process_topology_2_y.n[1], // int odist,
FFTW_FORWARD, // int sign,
0); // unsigned flags
m_plan_f_z = fftw_plan_many_dft(1, // rank
&(m_d.process_topology_2_z.n[2]), // const int *n,
m_d.process_topology_2_z.n[1] * m_d.process_topology_2_z.n[0], // howmany
FFTW_ADDR(m_buf1),
NULL, // const int *inembed,
1, // int istride,
m_d.process_topology_2_z.n[2], // int idist,
FFTW_ADDR(m_buf2),
NULL, // const int *onembed,
1, // int ostride,
m_d.process_topology_2_z.n[2], // int odist,
FFTW_FORWARD, // int sign,
0); // unsigned flags
m_plan_b_x = fftw_plan_many_dft(1, // rank
&(m_d.process_topology_2_x.n[0]), // const int *n,
m_d.process_topology_2_x.n[1] * m_d.process_topology_2_x.n[2], // howmany
FFTW_ADDR(m_buf1),
NULL, // const int *inembed,
1, // int istride,
m_d.process_topology_2_x.n[0], // int idist,
FFTW_ADDR(m_buf3),
NULL, // const int *onembed,
1, // int ostride,
m_d.process_topology_2_x.n[0], // int odist,
FFTW_BACKWARD, // int sign,
0); // unsigned flags
m_plan_b_y = fftw_plan_many_dft(1, // rank
&(m_d.process_topology_2_y.n[1]), // const int *n,
m_d.process_topology_2_y.n[0] * m_d.process_topology_2_y.n[2], // howmany
FFTW_ADDR(m_buf3),
NULL, // const int *inembed,
1, // int istride,
m_d.process_topology_2_y.n[1], // int idist,
FFTW_ADDR(m_buf1),
NULL, // const int *onembed,
1, // int ostride,
m_d.process_topology_2_y.n[1], // int odist,
FFTW_BACKWARD, // int sign,
0); // unsigned flags
m_plan_b_z = fftw_plan_many_dft(1, // rank
&(m_d.process_topology_2_z.n[2]), // const int *n,
m_d.process_topology_2_z.n[1] * m_d.process_topology_2_z.n[0], // howmany
FFTW_ADDR(m_buf1),
NULL, // const int *inembed,
1, // int istride,
m_d.process_topology_2_z.n[2], // int idist,
FFTW_ADDR(m_buf3),
NULL, // const int *onembed,
1, // int ostride,
m_d.process_topology_2_z.n[2], // int odist,
FFTW_BACKWARD, // int sign,
0); // unsigned flags
#elif defined(FFTW3)
if (transposed_order) {
flags_f |= FFTW_MPI_TRANSPOSED_OUT;
flags_b |= FFTW_MPI_TRANSPOSED_IN;
}
#if CERRILLOS_SLAB_HACK == 1
//attempt to make mpi fftw 3.3 work on more of cerrillos
//because of green's function application,
//does not require changes to simulation code
flags_f |= FFTW_DESTROY_INPUT;
flags_b |= FFTW_DESTROY_INPUT;
#endif
m_plan_f = fftw_mpi_plan_dft_3d(m_d.n[0], m_d.n[1], m_d.n[2],
FFTW_ADDR(m_buf1), FFTW_ADDR(m_buf2),
comm, FFTW_FORWARD, flags_f);
m_plan_b = fftw_mpi_plan_dft_3d(m_d.n[0], m_d.n[1], m_d.n[2],
FFTW_ADDR(m_buf1), FFTW_ADDR(m_buf3),
comm, FFTW_BACKWARD, flags_b);
#else
m_plan_f = fftw3d_mpi_create_plan(comm, m_d.n[0], m_d.n[1], m_d.n[2], FFTW_FORWARD, flags_f);
m_plan_b = fftw3d_mpi_create_plan(comm, m_d.n[0], m_d.n[1], m_d.n[2], FFTW_BACKWARD, flags_b);
#endif
}
///
// Solve for the potential by applying the Green's function to the
// density (in k-space)
// rho density (input)
// phi potential (output)
///
void kspace_solve(const complex_t *rho, complex_t *phi)
{
int k[3];
int index;
initialize_greens_function();
int local_dim[3];
int self_coord[3];
#if defined(FFTW3) && defined(PENCIL)
self_coord[0]=self_2d_z(0);
self_coord[1]=self_2d_z(1);
self_coord[2]=self_2d_z(2);
local_dim[0]=local_ng_2d_z(0);
local_dim[1]=local_ng_2d_z(1);
local_dim[2]=local_ng_2d_z(2);
#else
self_coord[0]=self_1d(0);
self_coord[1]=self_1d(1);
self_coord[2]=self_1d(2);
local_dim[0]=local_ng_1d(0);
local_dim[1]=local_ng_1d(1);
local_dim[2]=local_ng_1d(2);
#endif
index = 0;
for (int local_k0 = 0; local_k0 < local_dim[0]; ++local_k0) {
k[0] = local_k0 + self_coord[0] * local_dim[0];
for (int local_k1 = 0; local_k1 < local_dim[1]; ++local_k1) {
k[1] = local_k1 + self_coord[1] * local_dim[1];
for (int local_k2 = 0; local_k2 < local_dim[2]; ++local_k2) {
k[2] = local_k2 + self_coord[2] * local_dim[2];
phi[index] = m_green[index] * rho[index];
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
}
///
// Solve for the gradient of the potential along the given axis by
// applying the derivative Green's function to the density (in
// k-space)
// axis the axis along which to take the gradient
// rho density (input)
// grad_phi the gradient of the potential (output)
///
void kspace_solve_gradient(int axis, const complex_t *rho, complex_t *grad_phi)
{
int k[3];
int index;
initialize_greens_function();
int local_dim[3];
int self_coord[3];
#if defined(FFTW3) && defined(PENCIL)
self_coord[0]=self_2d_z(0);
self_coord[1]=self_2d_z(1);
self_coord[2]=self_2d_z(2);
local_dim[0]=local_ng_2d_z(0);
local_dim[1]=local_ng_2d_z(1);
local_dim[2]=local_ng_2d_z(2);
#else
self_coord[0]=self_1d(0);
self_coord[1]=self_1d(1);
self_coord[2]=self_1d(2);
local_dim[0]=local_ng_1d(0);
local_dim[1]=local_ng_1d(1);
local_dim[2]=local_ng_1d(2);
#endif
index = 0;
for (int local_k0 = 0; local_k0 < local_dim[0]; ++local_k0) {
k[0] = local_k0 + self_coord[0] * local_dim[0];
for (int local_k1 = 0; local_k1 < local_dim[1]; ++local_k1) {
k[1] = local_k1 + self_coord[1] * local_dim[1];
for (int local_k2 = 0; local_k2 < local_dim[2]; ++local_k2) {
k[2] = local_k2 + self_coord[2] * local_dim[2];
grad_phi[index] = I * (- m_gradient[k[axis]]) * m_green[index] * rho[index];
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
}
///
// Allocate and pre-calculate isotropic Green's function and
// gradient operator.
///
virtual void initialize_greens_function() = 0;
protected:
double max(double a, double b) { return a > b ? a : b; }
std::vector<double> m_green; // green's function
std::vector<double> m_gradient; //imaginary part of the gradient in grid units
std::vector<double> m_pk_cic;
std::vector<double> m_pk_weight;
std::vector<int> m_pk_ksq;
std::vector<complex_t, bigchunk_allocator<complex_t> > m_buf1;
std::vector<complex_t, bigchunk_allocator<complex_t> > m_buf2;
std::vector<complex_t, bigchunk_allocator<complex_t> > m_buf3;
#if defined(FFTW3) && defined(PENCIL)
fftw_plan m_plan_f_x;
fftw_plan m_plan_f_y;
fftw_plan m_plan_f_z;
fftw_plan m_plan_b_x;
fftw_plan m_plan_b_y;
fftw_plan m_plan_b_z;
#elif defined(FFTW3)
fftw_plan m_plan_f;
fftw_plan m_plan_b;
#else
fftwnd_mpi_plan m_plan_f;
fftwnd_mpi_plan m_plan_b;
#endif
bool m_greens_functions_initialized;
};
///
// Poison solver using a 2nd-order discrete Green's function, and a
// 2nd-order derivative.
//
// G(k) = 1 / (2 * (Sum_i cos(2 pi k_i / n) - 3))
// (D_i f)(k) = ( - i * sin(2 pi k / n) / (2 pi / n) )* f(k)
///
class SolverDiscrete : public SolverBase {
public:
SolverDiscrete(MPI_Comm comm, int ng)
: SolverBase(comm, ng)
{
}
SolverDiscrete(MPI_Comm comm, std::vector<int> n)
: SolverBase(comm, n)
{
}
///
// Allocate and pre-calculate isotropic Green's function (1D
// distribution) and trigonometric factors:
// 1 / (2 * (Sum_i cos(2 pi k_i / n) - 3))
///
void initialize_greens_function()
{
double kstep;
int index;
int k[3];
std::vector<double> cosine;
if (m_greens_functions_initialized) {
return;
}
m_greens_functions_initialized = true;
m_green.resize(local_size());
m_gradient.resize(m_d.n[0]);
cosine.resize(m_d.n[0]);
// cache trigonometric factors and imaginary part of gradient
kstep = 2.0 * pi / (double) m_d.n[0];
for (int kk = 0; kk < m_d.n[0]; ++kk) {
cosine[kk] = cos(kk * kstep);
m_gradient[kk] = sin(kk * kstep); // imaginary part of gradient
}
// cache isotropic Green's function (1D or 2D distribution)
int local_dim[3];
int self_coord[3];
#if defined(FFTW3) && defined(PENCIL)
self_coord[0]=self_2d_z(0);
self_coord[1]=self_2d_z(1);
self_coord[2]=self_2d_z(2);
local_dim[0]=local_ng_2d_z(0);
local_dim[1]=local_ng_2d_z(1);
local_dim[2]=local_ng_2d_z(2);
#else
self_coord[0]=self_1d(0);
self_coord[1]=self_1d(1);
self_coord[2]=self_1d(2);
local_dim[0]=local_ng_1d(0);
local_dim[1]=local_ng_1d(1);
local_dim[2]=local_ng_1d(2);
#endif
index = 0;
double coeff = 0.5 / double(global_size());
for (int local_k0 = 0; local_k0 < local_dim[0]; ++local_k0) {
k[0] = local_k0 + self_coord[0] * local_dim[0];
for (int local_k1 = 0; local_k1 < local_dim[1]; ++local_k1) {
k[1] = local_k1 + self_coord[1] * local_dim[1];
for (int local_k2 = 0; local_k2 < local_dim[2]; ++local_k2) {
k[2] = local_k2 + self_coord[2] * local_dim[2];
m_green[index] = coeff / (cosine[k[0]] + cosine[k[1]] + cosine[k[2]] - 3.0);
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
// handle the pole
if (self() == 0) {
m_green[0] = 0.0;
}
}
};
///
// Poison solver using a 6th-order discrete Green's function with a
// Gaussian noise-quieting filter function, and a 4th-order
// derivative.
//
// G(k) = W(k) * 45/128 / Sum_i [ cos (2 pi k / n)
// - 5/64 cos(4 pi k / n)
// + 1/1024 cos(8 pi k_i / n)
// - 945/1024 ]
// or
// G(k) = W(k) / Sum_i ( a0 + a1 cos (2 pi k / n)
// + a2 cos (4 pi k / n)
// + a3 cos (8 pi k / n) )
// where
// a0 = -21/8
// a1 = 128/45
// a2 = -2/9
// a3 = 1/360
// satisfying
// a0 + a1 + a2 + a3 = 0
// (a1 + 4 a2 + 16 a3) = 2
// and
// W(k) = exp( - k^2 sigma^2 / 4) [ sin(k/2) / (k/2)]^n_s
// sigma = 0.8
// n_s = 3
//
// The gradient operator (imaginary part, grid units) is
// (gradient f)(k) = b1 sin(2 pi k / n) + b2 sin( 4 pi k / n) f(k)
// where
// b1 = 4/3
// b2 = -1/6
// satisfying
// b1 + 2 b2 = 1
// b1 + 8 b2 = 0
///
class SolverQuiet : public SolverBase {
public:
SolverQuiet(MPI_Comm comm, int ng)
: SolverBase(comm, ng)
{
}
SolverQuiet(MPI_Comm comm, std::vector<int> n)
: SolverBase(comm, n)
{
}
///
// Allocate and pre-calculate isotropic Green's function (1D
// distribution) and trigonometric factors:
// 1 / (2 * (Sum_i cos(2 pi k_i / n) - 3))
///
void initialize_greens_function()
{
double kstep;
int index;
int k[3];
std::vector<double> c1;
std::vector<double> c2;
std::vector<double> c3;
std::vector<double> filter;
std::vector<double> kperiodic;
double const a0 = - 21.0 / 8.0;
double const a1 = 128.0 / 45.0;
double const a2 = - 2.0 / 9.0;
double const a3 = 1.0 / 360.0;
double const b1 = 4.0 / 3.0;
double const b2 = - 1.0 / 6.0;
double const sigma = 0.8;
double const ns = 3.0;
int ng = m_d.n[0];
if (m_greens_functions_initialized) {
return;
}
// check Taylor series coefficent conditions
assert(fabs(a0 + a1 + a2 + a3) < 1.0e-12);
assert(fabs(a1 + 4 * a2 + 16 * a3 - 2.0) < 1.0e-12);
assert(fabs(b1 + 2 * b2 - 1.0) < 1.0e-12);
assert(fabs(b1 + 8 * b2) < 1.0e-12);
m_greens_functions_initialized = true;
m_green.resize(local_size());
m_gradient.resize(ng);
c1.resize(ng);
c2.resize(ng);
c3.resize(ng);
kperiodic.resize(ng);
filter.resize(ng);
// cache k array with the correct periodicity
// cache trigonometric factors and imaginary part of gradient
kstep = 2.0 * pi / static_cast<double>(ng);
for (int kk = 0; kk < ng; ++kk) {
c1[kk] = cos(kk * kstep);
c2[kk] = cos(2 * kk * kstep);
c3[kk] = cos(4 * kk * kstep);
if (kk < ng / 2) {
kperiodic[kk] = kk * kstep;
kperiodic[kk + ng / 2] = (kk - ng / 2) * kstep;
}
}
// cache Green's function filter, and 4th order k-space gradient operator
filter[0] = 1.0;
m_gradient[0] = 1.0;
for (int kk = 1; kk < ng; ++kk) {
filter[kk] = exp(- 0.25 * sigma * sigma * kperiodic[kk] * kperiodic[kk])
* pow(sin(0.5 * kperiodic[kk]) / (0.5 * kperiodic[kk]), ns);
m_gradient[kk] = (b1 * sin(kperiodic[kk]) + b2 * sin(2 * kperiodic[kk]));
}
// cache isotropic Green's function (1D or 2D distribution)
int local_dim[3];
int self_coord[3];
#if defined(FFTW3) && defined(PENCIL)
self_coord[0]=self_2d_z(0);
self_coord[1]=self_2d_z(1);
self_coord[2]=self_2d_z(2);
local_dim[0]=local_ng_2d_z(0);
local_dim[1]=local_ng_2d_z(1);
local_dim[2]=local_ng_2d_z(2);
#else
self_coord[0]=self_1d(0);
self_coord[1]=self_1d(1);
self_coord[2]=self_1d(2);
local_dim[0]=local_ng_1d(0);
local_dim[1]=local_ng_1d(1);
local_dim[2]=local_ng_1d(2);
#endif
index = 0;
double coeff = 1.0 / double(global_size());
for (int local_k0 = 0; local_k0 < local_dim[0]; ++local_k0) {
k[0] = local_k0 + self_coord[0] * local_dim[0];
double d0 = a0 + a1 * c1[k[0]] + a2 * c2[k[0]] + a3 * c3[k[0]];
for (int local_k1 = 0; local_k1 < local_dim[1]; ++local_k1) {
k[1] = local_k1 + self_coord[1] * local_dim[1];
double d1 = a0 + a1 * c1[k[1]] + a2 * c2[k[1]] + a3 * c3[k[1]];
for (int local_k2 = 0; local_k2 < local_dim[2]; ++local_k2) {
k[2] = local_k2 + self_coord[2] * local_dim[2];
double filt = coeff * filter[k[0]] * filter[k[1]] * filter[k[2]];
double d2 = a0 + a1 * c1[k[2]] + a2 * c2[k[2]] + a3 * c3[k[2]];
m_green[index] = filt / (d0 + d1 + d2);
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
// handle the pole
if (self() == 0) {
m_green[0] = 0.0;
}
}
};
///
// Poisson solver class using continuum Green's function:
// - 1 / k^2
///
class SolverContinuum : public SolverBase {
public:
virtual void this_class_is_not_yet_tested_and_should_not_be_used() = 0;
SolverContinuum(MPI_Comm comm, int ng)
: SolverBase(comm, ng)
{
}
SolverContinuum(MPI_Comm comm, std::vector<int> n)
: SolverBase(comm, n)
{
}
///
// Allocate and pre-calculate isotropic Green's function and gradient.
///
void initialize_greens_function()
{
double kstep;
double coeff;
int index;
int k[3];
int ng = m_d.n[0];
if (m_greens_functions_initialized) {
return;
}
m_greens_functions_initialized = true;
m_green.resize(local_size());
m_gradient.resize(ng);
// cache imaginary part of gradient, imposing symmetries by hand
kstep = 2.0 * pi / (double) ng;
for (int kk = 0; kk < ng / 2; ++kk) {
m_gradient[kk] = kk * kstep;
m_gradient[kk + ng / 2] = (kk - ng / 2) * kstep;
}
// cache isotropic Green's function (1D or 2D distribution)
int local_dim[3];
int self_coord[3];
#if defined(FFTW3) && defined(PENCIL)
self_coord[0]=self_2d_z(0);
self_coord[1]=self_2d_z(1);
self_coord[2]=self_2d_z(2);
local_dim[0]=local_ng_2d_z(0);
local_dim[1]=local_ng_2d_z(1);
local_dim[2]=local_ng_2d_z(2);
#else
self_coord[0]=self_1d(0);
self_coord[1]=self_1d(1);
self_coord[2]=self_1d(2);
local_dim[0]=local_ng_1d(0);
local_dim[1]=local_ng_1d(1);
local_dim[2]=local_ng_1d(2);
#endif
index = 0;
coeff = -1.0 / double(global_size());
for (int local_k0 = 0; local_k0 < local_dim[0]; ++local_k0) {
k[0] = local_k0 + self_coord[0] * local_dim[0];
double k0sq = m_gradient[k[0]] * m_gradient[k[0]];
for (int local_k1 = 0; local_k1 < local_dim[1]; ++local_k1) {
k[1] = local_k1 + self_coord[1] * local_dim[1];
double k1sq = m_gradient[k[1]] * m_gradient[k[1]];
for (int local_k2 = 0; local_k2 < local_dim[2]; ++local_k2) {
k[2] = local_k2 + self_coord[2] * local_dim[2];
double k2sq = m_gradient[k[2]] * m_gradient[k[2]];
m_green[index] = coeff / (k0sq + k1sq + k2sq);
index++;
}
index += m_d.padding[2];
}
index += m_d.padding[1];
}
// handle the pole
if (self() == 0) {
m_green[0] = 0.0;
}
}
};
#endif