| c--------------------------------------------------------------------- |
| c--------------------------------------------------------------------- |
| |
| subroutine copy_faces |
| |
| c--------------------------------------------------------------------- |
| c--------------------------------------------------------------------- |
| |
| c--------------------------------------------------------------------- |
| c |
| c This function copies the face values of a variable defined on a set |
| c of cells to the overlap locations of the adjacent sets of cells. |
| c Because a set of cells interfaces in each direction with exactly one |
| c other set, we only need to fill six different buffers. We could try to |
| c overlap communication with computation, by computing |
| c some internal values while communicating boundary values, but this |
| c adds so much overhead that it's not clearly useful. |
| c--------------------------------------------------------------------- |
| |
| include 'header.h' |
| include 'mpinpb.h' |
| |
| integer i, j, k, c, m, requests(0:11), p0, p1, |
| > p2, p3, p4, p5, b_size(0:5), ss(0:5), |
| > sr(0:5), error, statuses(MPI_STATUS_SIZE, 0:11) |
| |
| c--------------------------------------------------------------------- |
| c exit immediately if there are no faces to be copied |
| c--------------------------------------------------------------------- |
| if (no_nodes .eq. 1) then |
| call compute_rhs |
| return |
| endif |
| |
| ss(0) = start_send_east |
| ss(1) = start_send_west |
| ss(2) = start_send_north |
| ss(3) = start_send_south |
| ss(4) = start_send_top |
| ss(5) = start_send_bottom |
| |
| sr(0) = start_recv_east |
| sr(1) = start_recv_west |
| sr(2) = start_recv_north |
| sr(3) = start_recv_south |
| sr(4) = start_recv_top |
| sr(5) = start_recv_bottom |
| |
| b_size(0) = east_size |
| b_size(1) = west_size |
| b_size(2) = north_size |
| b_size(3) = south_size |
| b_size(4) = top_size |
| b_size(5) = bottom_size |
| |
| c--------------------------------------------------------------------- |
| c because the difference stencil for the diagonalized scheme is |
| c orthogonal, we do not have to perform the staged copying of faces, |
| c but can send all face information simultaneously to the neighboring |
| c cells in all directions |
| c--------------------------------------------------------------------- |
| if (timeron) call timer_start(t_bpack) |
| p0 = 0 |
| p1 = 0 |
| p2 = 0 |
| p3 = 0 |
| p4 = 0 |
| p5 = 0 |
| |
| do c = 1, ncells |
| |
| c--------------------------------------------------------------------- |
| c fill the buffer to be sent to eastern neighbors (i-dir) |
| c--------------------------------------------------------------------- |
| if (cell_coord(1,c) .ne. ncells) then |
| do k = 0, cell_size(3,c)-1 |
| do j = 0, cell_size(2,c)-1 |
| do i = cell_size(1,c)-2, cell_size(1,c)-1 |
| do m = 1, 5 |
| out_buffer(ss(0)+p0) = u(m,i,j,k,c) |
| p0 = p0 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| c--------------------------------------------------------------------- |
| c fill the buffer to be sent to western neighbors |
| c--------------------------------------------------------------------- |
| if (cell_coord(1,c) .ne. 1) then |
| do k = 0, cell_size(3,c)-1 |
| do j = 0, cell_size(2,c)-1 |
| do i = 0, 1 |
| do m = 1, 5 |
| out_buffer(ss(1)+p1) = u(m,i,j,k,c) |
| p1 = p1 + 1 |
| end do |
| end do |
| end do |
| end do |
| |
| endif |
| |
| c--------------------------------------------------------------------- |
| c fill the buffer to be sent to northern neighbors (j_dir) |
| c--------------------------------------------------------------------- |
| if (cell_coord(2,c) .ne. ncells) then |
| do k = 0, cell_size(3,c)-1 |
| do j = cell_size(2,c)-2, cell_size(2,c)-1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| out_buffer(ss(2)+p2) = u(m,i,j,k,c) |
| p2 = p2 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| c--------------------------------------------------------------------- |
| c fill the buffer to be sent to southern neighbors |
| c--------------------------------------------------------------------- |
| if (cell_coord(2,c).ne. 1) then |
| do k = 0, cell_size(3,c)-1 |
| do j = 0, 1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| out_buffer(ss(3)+p3) = u(m,i,j,k,c) |
| p3 = p3 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| c--------------------------------------------------------------------- |
| c fill the buffer to be sent to top neighbors (k-dir) |
| c--------------------------------------------------------------------- |
| if (cell_coord(3,c) .ne. ncells) then |
| do k = cell_size(3,c)-2, cell_size(3,c)-1 |
| do j = 0, cell_size(2,c)-1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| out_buffer(ss(4)+p4) = u(m,i,j,k,c) |
| p4 = p4 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| c--------------------------------------------------------------------- |
| c fill the buffer to be sent to bottom neighbors |
| c--------------------------------------------------------------------- |
| if (cell_coord(3,c).ne. 1) then |
| do k=0, 1 |
| do j = 0, cell_size(2,c)-1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| out_buffer(ss(5)+p5) = u(m,i,j,k,c) |
| p5 = p5 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| c--------------------------------------------------------------------- |
| c cell loop |
| c--------------------------------------------------------------------- |
| end do |
| if (timeron) call timer_stop(t_bpack) |
| |
| if (timeron) call timer_start(t_exch) |
| call mpi_irecv(in_buffer(sr(0)), b_size(0), |
| > dp_type, successor(1), WEST, |
| > comm_rhs, requests(0), error) |
| call mpi_irecv(in_buffer(sr(1)), b_size(1), |
| > dp_type, predecessor(1), EAST, |
| > comm_rhs, requests(1), error) |
| call mpi_irecv(in_buffer(sr(2)), b_size(2), |
| > dp_type, successor(2), SOUTH, |
| > comm_rhs, requests(2), error) |
| call mpi_irecv(in_buffer(sr(3)), b_size(3), |
| > dp_type, predecessor(2), NORTH, |
| > comm_rhs, requests(3), error) |
| call mpi_irecv(in_buffer(sr(4)), b_size(4), |
| > dp_type, successor(3), BOTTOM, |
| > comm_rhs, requests(4), error) |
| call mpi_irecv(in_buffer(sr(5)), b_size(5), |
| > dp_type, predecessor(3), TOP, |
| > comm_rhs, requests(5), error) |
| |
| call mpi_isend(out_buffer(ss(0)), b_size(0), |
| > dp_type, successor(1), EAST, |
| > comm_rhs, requests(6), error) |
| call mpi_isend(out_buffer(ss(1)), b_size(1), |
| > dp_type, predecessor(1), WEST, |
| > comm_rhs, requests(7), error) |
| call mpi_isend(out_buffer(ss(2)), b_size(2), |
| > dp_type,successor(2), NORTH, |
| > comm_rhs, requests(8), error) |
| call mpi_isend(out_buffer(ss(3)), b_size(3), |
| > dp_type,predecessor(2), SOUTH, |
| > comm_rhs, requests(9), error) |
| call mpi_isend(out_buffer(ss(4)), b_size(4), |
| > dp_type,successor(3), TOP, |
| > comm_rhs, requests(10), error) |
| call mpi_isend(out_buffer(ss(5)), b_size(5), |
| > dp_type,predecessor(3), BOTTOM, |
| > comm_rhs,requests(11), error) |
| |
| |
| call mpi_waitall(12, requests, statuses, error) |
| if (timeron) call timer_stop(t_exch) |
| |
| c--------------------------------------------------------------------- |
| c unpack the data that has just been received; |
| c--------------------------------------------------------------------- |
| if (timeron) call timer_start(t_bpack) |
| p0 = 0 |
| p1 = 0 |
| p2 = 0 |
| p3 = 0 |
| p4 = 0 |
| p5 = 0 |
| |
| do c = 1, ncells |
| |
| if (cell_coord(1,c) .ne. 1) then |
| do k = 0, cell_size(3,c)-1 |
| do j = 0, cell_size(2,c)-1 |
| do i = -2, -1 |
| do m = 1, 5 |
| u(m,i,j,k,c) = in_buffer(sr(1)+p0) |
| p0 = p0 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| if (cell_coord(1,c) .ne. ncells) then |
| do k = 0, cell_size(3,c)-1 |
| do j = 0, cell_size(2,c)-1 |
| do i = cell_size(1,c), cell_size(1,c)+1 |
| do m = 1, 5 |
| u(m,i,j,k,c) = in_buffer(sr(0)+p1) |
| p1 = p1 + 1 |
| end do |
| end do |
| end do |
| end do |
| end if |
| |
| if (cell_coord(2,c) .ne. 1) then |
| do k = 0, cell_size(3,c)-1 |
| do j = -2, -1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| u(m,i,j,k,c) = in_buffer(sr(3)+p2) |
| p2 = p2 + 1 |
| end do |
| end do |
| end do |
| end do |
| |
| endif |
| |
| if (cell_coord(2,c) .ne. ncells) then |
| do k = 0, cell_size(3,c)-1 |
| do j = cell_size(2,c), cell_size(2,c)+1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| u(m,i,j,k,c) = in_buffer(sr(2)+p3) |
| p3 = p3 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| if (cell_coord(3,c) .ne. 1) then |
| do k = -2, -1 |
| do j = 0, cell_size(2,c)-1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| u(m,i,j,k,c) = in_buffer(sr(5)+p4) |
| p4 = p4 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| if (cell_coord(3,c) .ne. ncells) then |
| do k = cell_size(3,c), cell_size(3,c)+1 |
| do j = 0, cell_size(2,c)-1 |
| do i = 0, cell_size(1,c)-1 |
| do m = 1, 5 |
| u(m,i,j,k,c) = in_buffer(sr(4)+p5) |
| p5 = p5 + 1 |
| end do |
| end do |
| end do |
| end do |
| endif |
| |
| c--------------------------------------------------------------------- |
| c cells loop |
| c--------------------------------------------------------------------- |
| end do |
| if (timeron) call timer_stop(t_bpack) |
| |
| c--------------------------------------------------------------------- |
| c do the rest of the rhs that uses the copied face values |
| c--------------------------------------------------------------------- |
| call compute_rhs |
| |
| return |
| end |