!======================================================================================= ! ! Authors: Jan Eitzinger (je), jan.eitzinger@fau.de ! Copyright (c) 2019 RRZE, University Erlangen-Nuremberg ! ! Permission is hereby granted, free of charge, to any person obtaining a copy ! of this software and associated documentation files (the "Software"), to deal ! in the Software without restriction, including without limitation the rights ! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ! copies of the Software, and to permit persons to whom the Software is ! furnished to do so, subject to the following conditions: ! ! The above copyright notice and this permission notice shall be included in all ! copies or substantial portions of the Software. ! ! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ! SOFTWARE.#include ! !======================================================================================= module constants implicit none integer, parameter :: sp = kind(0.0e0) integer, parameter :: dp = kind(0.0d0) end module constants module timing public getTimeStamp contains function getTimeStamp () result(seconds) use iso_fortran_env, only: int64, real64 implicit none doubleprecision :: seconds integer(int64) :: counter, count_step real(real64) :: ts call system_clock(counter, count_step) seconds = counter / real(count_step, real64) end function getTimeStamp end module timing module util use constants integer :: size_x, size_y contains subroutine str2int(str,int,stat) implicit none ! Arguments character(len=*),intent(in) :: str integer,intent(out) :: int integer,intent(out) :: stat read(str,*,iostat=stat) int end subroutine str2int subroutine copyGrid(src, dst) implicit none real(kind=dp), allocatable, intent(in) :: src(:,:) real(kind=dp), allocatable, intent(inout) :: dst(:,:) integer :: i,j !$OMP PARALLEL DO do j = 1, size_y do i = 1, size_x dst(i,j) = src(i,j) end do end do end subroutine copyGrid end module util module solver use timing use constants use util use likwid integer :: itermax real(kind=dp) :: tolerance= 1d-8 real(kind=dp) :: h_x, h_y contains subroutine init (x, u_sine, rhs_sine) implicit none real(kind=dp), allocatable, intent(inout) :: x(:,:) real(kind=dp), allocatable, intent(inout) :: u_sine(:,:) real(kind=dp), allocatable, intent(inout) :: rhs_sine(:,:) real(kind=dp), parameter :: pi = 4.0d0*atan(1.0d0) integer :: i, j !$OMP PARALLEL DO do j = 1, size_y do i = 1, size_x ! call RANDOM_NUMBER (rnumber) x(i,j) = (dble(j-1)*size_x+i-1)/dble(size_x*size_y) ! rnumber/0.9999 end do end do !$OMP PARALLEL DO do j = 1, size_y do i = 1, size_x u_sine(i,j) = sin(pi * dble(i-1) * h_x) * sin(pi * dble(j-1) *h_y) end do end do !print *,u_sine !$OMP PARALLEL DO do j = 1, size_y do i = 1, size_x rhs_sine(i,j) = 2.d0 * pi * pi * sin(pi * dble(i-1) * h_x) * sin(pi * dble(j-1) *h_y) end do end do !print *,rhs_sine end subroutine init function axpby (res, a, x, b, y) result(seconds) implicit none real(kind=dp) :: seconds real(kind=dp), allocatable, intent(inout) :: res(:,:) real(kind=dp), intent(in) :: a real(kind=dp), allocatable, intent(inout) :: x(:,:) real(kind=dp), intent(in) :: b real(kind=dp), allocatable, intent(inout) :: y(:,:) real(kind=dp) :: S, E integer :: i, j S = getTimeStamp() !$OMP PARALLEL call likwid_markerStartRegion("axpby") !$OMP DO do j = 1, size_y do i = 1, size_x res(i,j) = (a * x(i,j)) + (b * y(i,j)) end do end do !$OMP END DO call likwid_markerStopRegion("axpby") !$OMP END PARALLEL E = getTimeStamp() seconds = E-S end function axpby function dotProduct (x, y, l2sq) result(seconds) implicit none real(kind=dp) :: seconds,l2_sq real(kind=dp), allocatable, intent(in) :: x(:,:) real(kind=dp), allocatable, intent(in) :: y(:,:) real(kind=dp), intent(inout) :: l2sq real(kind=dp) :: S, E integer :: i, j l2_sq = 0.d0 S = getTimeStamp() !$OMP PARALLEL DO REDUCTION(+:l2_sq) do j = 1, size_y do i = 1, size_x l2_sq = l2_sq + x(i,j) * y(i,j) end do end do E = getTimeStamp() l2sq = l2_sq !print *,"DP = ",l2sq seconds = E-S end function dotProduct function applyStencil (res, u) result(seconds) implicit none real(kind=dp) :: seconds real(kind=dp), allocatable, intent(inout) :: res(:,:) real(kind=dp), allocatable, intent(in) :: u(:,:) real(kind=dp) :: w_x, w_y, w_c real(kind=dp) :: S, E integer :: i, j w_x = 1.0D00 / (h_x * h_x); w_y = 1.0D00 / (h_y * h_y); w_c = 2.0D00 * (w_x + w_y); S = getTimeStamp() !$OMP PARALLEL DO do j = 2, size_y-1 do i = 2, size_x-1 res(i,j) = & w_c * u(i,j) - & w_y * (u(i,j+1) + u(i,j-1)) - & w_x * (u(i+1,j) + u(i-1,j)) end do end do E = getTimeStamp() !print *,"---------------------" !print *,res(2:size_x-1,2:size_y-1) !print *,"---------------------" seconds = E-S end function applyStencil subroutine computeResidual (residual, x, rhs_sine, res_start) implicit none real(kind=dp), allocatable, intent(inout) :: residual(:,:) real(kind=dp), allocatable, intent(inout) :: x(:,:) real(kind=dp), allocatable, intent(inout) :: rhs_sine(:,:) real(kind=dp), intent(inout) :: res_start real(kind=dp) :: duration duration = applyStencil(residual, x) duration = axpby(residual, 1.0D00, rhs_sine, -1.0D00, residual) duration = dotProduct(residual, residual, res_start) end subroutine computeResidual function cg (x, b, iter_end) result(seconds) implicit none real(kind=dp) :: seconds real(kind=dp), allocatable, intent(inout) :: x(:,:) real(kind=dp), allocatable, intent(inout) :: b(:,:) integer, intent(inout) :: iter_end real(kind=dp), allocatable :: p(:,:), v(:,:), r(:,:) real(kind=dp) :: S, E real(kind=dp) :: lambda=0.0D00 real(kind=dp) :: alpha_0=0.0D00, alpha_1=0.0D00 real(kind=dp) :: tolSquared real(kind=dp) :: duration integer :: i, j integer :: iter=0 tolSquared = tolerance * tolerance allocate(p(size_x, size_y)) allocate(v(size_x, size_y)) allocate(r(size_x, size_y)) call computeResidual(p, x, b, alpha_0) call copyGrid(p, r) !print *,"Before loop: ",iter, alpha_0 S = getTimeStamp() do while ( (iter < itermax) .and. (alpha_0 > tolSquared) ) call likwid_markerStartRegion("CG") duration = applyStencil(v, p) duration = dotProduct(v, p, lambda) lambda = alpha_0/lambda !print *,"lambda = ",lambda duration = axpby(x, 1.0D00, x, lambda, p) duration = axpby(r, 1.0D00, r, -lambda, v) duration = dotProduct(r, r, alpha_1) duration = axpby(p, 1.0D00, r, alpha_1/alpha_0, p) alpha_0 = alpha_1 print *,"iter = ",iter," res = ", alpha_0 iter = iter + 1 call likwid_markerStopRegion("CG") end do E = getTimeStamp() iter_end = iter print *,iter_end, itermax, alpha_0, tolSquared seconds = E-S end function cg end module solver program mfcg use timing use constants use solver use util use likwid implicit none integer :: argc character(len=32) :: arg real(kind=dp), allocatable :: u_sine(:,:), rhs_sine(:,:), x(:,:), residual(:,:) real(kind=dp) :: res_start, err_start = 0.0D00 real(kind=dp) :: res_sine_cg, err_sine_cg = 0.0D00 integer :: iter_sine_cg integer :: stat double precision :: duration !$ INTEGER omp_get_num_threads !$ EXTERNAL omp_get_num_threads call likwid_markerInit() !$omp parallel call likwid_markerRegisterRegion("CG") !$omp end parallel itermax = 50 argc = command_argument_count() if (argc > 1) then call get_command_argument(1, arg) call str2int(trim(arg), size_x, stat) call get_command_argument(2, arg) call str2int(trim(arg), size_y, stat) if (argc == 3) then call get_command_argument(3, arg) call str2int(trim(arg), itermax, stat) end if else print *,"Usage: mfcg " stop end if h_x = 1.0D00/(dble(size_x)-1.0D00) h_y = 1.0D00/(dble(size_y)-1.0D00) allocate(u_sine(size_x, size_y)) allocate(rhs_sine(size_x, size_y)) allocate(x(size_x, size_y)) allocate(residual(size_x, size_y)) call init(x, u_sine, rhs_sine) !$omp parallel !$omp master print *,'----------------------------------------------' !$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS() !$omp end master !$omp end parallel PRINT *,'----------------------------------------------' ! compute start error and residual duration = axpby(residual, 1.0D00, u_sine, -1.0D00, x) duration = dotProduct(residual, residual, err_start) call computeResidual(residual, x, rhs_sine, res_start) duration = cg( x, rhs_sine, iter_sine_cg) print *,"-------------------------------------------------------------" print *,"CG iterations = ",iter_sine_cg print *,"Performance CG = ",dble(iter_sine_cg)*size_x*size_y*1e-6/duration print *,"-------------------------------------------------------------" ! compute end error and residual duration = axpby(residual, 1.0D00, u_sine, -1.0D00, x) duration = dotProduct(residual, residual, err_sine_cg) call computeResidual(residual, x, rhs_sine, res_sine_cg) call likwid_markerClose() end program mfcg