MPI-SCATCI  2.0
An MPI version of SCATCI
Parallelization_module.F90
Go to the documentation of this file.
1 ! Copyright 2019
2 !
3 ! For a comprehensive list of the developers that contributed to these codes
4 ! see the UK-AMOR website.
5 !
6 ! This file is part of UKRmol-in (UKRmol+ suite).
7 !
8 ! UKRmol-in is free software: you can redistribute it and/or modify
9 ! it under the terms of the GNU General Public License as published by
10 ! the Free Software Foundation, either version 3 of the License, or
11 ! (at your option) any later version.
12 !
13 ! UKRmol-in is distributed in the hope that it will be useful,
14 ! but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ! GNU General Public License for more details.
17 !
18 ! You should have received a copy of the GNU General Public License
19 ! along with UKRmol-in (in source/COPYING). Alternatively, you can also visit
20 ! <https://www.gnu.org/licenses/>.
21 
30 
31 #ifdef usempi
32  use mpi
33 #endif
34  use blas_lapack_gbl, only: blasint
35  use mpi_gbl, only: mpiint, myrank, nprocs, local_rank, local_nprocs, shared_communicator
36 
37  implicit none
38 
43  type :: processgrid
44  integer(blasint) :: wcntxt
45  integer(blasint) :: wprows
46  integer(blasint) :: wpcols
47  integer(blasint) :: mywrow
48  integer(blasint) :: mywcol
49 
50  integer :: igroup
51  integer :: ngroup
52 
53  integer :: gprocs
54  integer :: grank
55  integer :: lprocs
56  integer :: lrank
57 
58  integer(blasint) :: gcntxt
59  integer(blasint) :: gprows
60  integer(blasint) :: gpcols
61  integer(blasint) :: mygrow
62  integer(blasint) :: mygcol
63 
64  integer(mpiint) :: gcomm
65  integer(mpiint) :: lcomm
66 
67  integer, allocatable :: groupnprocs(:)
68 
69  logical :: sequential
70  contains
71  procedure, public, pass :: setup => setup_process_grid
72  procedure, public, pass :: is_my_group_work
73  procedure, public, pass :: which_group_is_work
74  procedure, public, pass :: group_master_world_rank
75  procedure, public, pass :: summarize
76  procedure, private, nopass :: square
77  end type processgrid
78 
79  ! global instance of the process grid
81 
82 contains
83 
99  subroutine setup_process_grid (this, ngroup, sequential)
100 
101  use const_gbl, only: stdout
102 
103  class(processgrid), intent(inout) :: this
104  integer, intent(in) :: ngroup
105  logical, intent(in) :: sequential
106 
107  integer, allocatable :: groupstarts(:), groupends(:)
108  integer(mpiint) :: n, color, ierr
109  integer(blasint) :: cntxt, rows, cols
110  integer :: i, j, k, rank
111  integer(blasint), allocatable :: groupmap(:,:)
112 
113  allocate (this % groupnprocs(ngroup), groupstarts(ngroup), groupends(ngroup))
114 
115  ! no context splitting needed if too few processes, or if concurrent diagonalizations are not desired
116  this % sequential = (sequential .or. ngroup == 1 .or. nprocs < ngroup)
117 
118  ! default initialization
119  this % gcomm = 0
120  this % lcomm = shared_communicator
121  this % igroup = 0
122  this % ngroup = merge(1, ngroup, this % sequential)
123  this % gprocs = nprocs
124  this % grank = myrank
125  this % lprocs = local_nprocs
126  this % lrank = local_rank
127  this % groupnprocs = nprocs
128 
129 #ifdef usempi
130 # ifdef scalapack
131  ! allocate BLACS context for the world
132  call this % square(int(nprocs), this % wprows, this % wpcols)
133  call blacs_get(-1_blasint, 0_blasint, this % wcntxt)
134  call blacs_gridinit(this % wcntxt, 'r', this % wprows, this % wpcols)
135  call blacs_gridinfo(this % wcntxt, this % wprows, this % wpcols, this % mywrow, this % mywcol)
136 # endif
137 
138  if (this % sequential) then
139  this % gcntxt = this % wcntxt
140  this % gcomm = mpi_comm_world
141  this % groupnprocs = nprocs
142  this % gprows = this % wprows
143  this % gpcols = this % wpcols
144  this % mygrow = this % mywrow
145  this % mygcol = this % mywcol
146  return
147  end if
148 
149  ! spread processes among groups (leading groups will end with more processes on imbalance)
150  this % groupnprocs(:) = 0
151  do i = 1, nprocs
152  j = 1 + mod(i - 1, this % ngroup)
153  this % groupnprocs(j) = this % groupnprocs(j) + 1
154  end do
155  groupstarts(1) = 0
156  groupends(this % ngroup) = nprocs - 1
157  do i = 1, this % ngroup - 1
158  groupstarts(i + 1) = groupstarts(i) + this % groupnprocs(i)
159  groupends(i) = groupstarts(i + 1) - 1
160  end do
161 
162  ! find out which group this process belongs to (store zero-based index)
163  do i = 1, this % ngroup
164  if (groupstarts(i) <= myrank .and. myrank <= groupends(i)) then
165  this % igroup = i - 1
166  exit
167  end if
168  end do
169 
170  ! create a MPI group and split it to sub-groups on individual nodes
171  color = this % igroup
172  call mpi_comm_split(mpi_comm_world, color, myrank, this % gcomm, ierr)
173  call mpi_comm_split_type(this % gcomm, mpi_comm_type_shared, myrank, mpi_info_null, this % lcomm, ierr)
174  call mpi_comm_rank(this % gcomm, n, ierr); this % grank = n
175  call mpi_comm_size(this % gcomm, n, ierr); this % gprocs = n
176  call mpi_comm_rank(this % lcomm, n, ierr); this % lrank = n
177  call mpi_comm_size(this % lcomm, n, ierr); this % lprocs = n
178 
179 # ifdef scalapack
180  ! Allocate BLACS context for the group. This has to be done separately for each group,
181  ! not only once with a group-dependent content of `groupmap`, because `blacs_gridmap` uses
182  ! `MPI_Group_incl`, which does not support creation of several groups at one time.
183 
184  write (stdout, '(/," Creating BLACS groups")')
185  write (stdout, '( " ---------------------")')
186 
187  do i = 1, this % ngroup
188 
189  ! populate matrix of ranks that belong to the this group's grid
190  call this % square(this % groupnprocs(i), rows, cols)
191  allocate (groupmap(rows, cols))
192  rank = 0
193  do k = 1, cols
194  do j = 1, rows
195  groupmap(j, k) = groupstarts(i) + rank
196  rank = rank + 1
197  end do
198  end do
199 
200  ! split off a new BLACS context in a collective call
201  call blacs_get(-1_blasint, 0_blasint, cntxt)
202  write (stdout, '(" Group #",I0," size ",I0,"x",I0," grid ",*(1x,I0))') i, rows, cols, groupmap
203  call blacs_gridmap(cntxt, groupmap, rows, rows, cols)
204  deallocate (groupmap)
205 
206  ! if this group is mine, store the group information
207  if (i == this % igroup + 1) then
208  this % gprows = rows
209  this % gpcols = cols
210  this % gcntxt = cntxt
211  end if
212 
213  end do
214 
215  ! find out this process' position within its BLACS grid
216  call blacs_gridinfo(this % gcntxt, this % gprows, this % gpcols, this % mygrow, this % mygcol)
217 # endif
218 #endif
219 
220  end subroutine setup_process_grid
221 
222 
227  subroutine summarize (this)
228 
229  use const_gbl, only: stdout
230 
231  class(processgrid), intent(in) :: this
232 
233  integer :: i
234 
235  write (stdout, *)
236  write (stdout, '(1x,A)') 'Process grid'
237  write (stdout, '(1x,A)') '------------'
238  write (stdout, '(1x,A,L1)') 'Sequential diagonalizations: ', this % sequential
239  write (stdout, '(1x,A,I0)') 'Number of groups: ', this % ngroup
240  write (stdout, '(1x,A)', advance = 'no') 'Number of processes per group:'
241  do i = 1, this % ngroup
242  write (stdout, '(1x,I0)', advance = 'no') this % groupnprocs(i)
243  end do
244  write (stdout, *)
245  write (stdout, '(1x,A,I0)') 'This process belongs to group: ', this % igroup
246  write (stdout, '(1x,A,I0)') 'This group size: ', this % gprocs
247  write (stdout, '(1x,A,I0)') 'This group rank: ', this % grank
248  write (stdout, '(1x,A,I0)') 'Shared-memory sub-group size: ', this % lprocs
249  write (stdout, '(1x,A,I0)') 'Shared-memory sub-group rank: ', this % lrank
250  write (stdout, *)
251 
252  end subroutine summarize
253 
254 
262  logical function is_my_group_work (this, i) result (verdict)
263 
264  class(processgrid), intent(inout) :: this
265  integer, intent(in) :: i
266 
267  verdict = (this % which_group_is_work(i) == this % igroup)
268 
269  end function is_my_group_work
270 
271 
279  integer function which_group_is_work (this, i) result (igroup)
280 
281  class(processgrid), intent(inout) :: this
282  integer, intent(in) :: i
283 
284  igroup = mod(i - 1, this % ngroup)
285 
286  end function which_group_is_work
287 
288 
293  integer function group_master_world_rank (this, igroup) result (rank)
294 
295  class(processgrid), intent(inout) :: this
296  integer, intent(in) :: igroup
297 
298  rank = sum(this % groupnprocs(1:igroup))
299 
300  end function group_master_world_rank
301 
302 
310  subroutine square (n, a, b)
311 
312  integer, intent(in) :: n
313  integer(blasint), intent(out) :: a, b
314 
315  integer :: i
316 
317  do i = 1, int(sqrt(real(n)) + 1)
318  if (mod(n, i) == 0) then
319  a = i
320  b = n / i
321  end if
322  end do
323 
324  end subroutine square
325 
326 end module parallelization_module
parallelization_module::which_group_is_work
integer function which_group_is_work(this, i)
Find out which group workitem will be processed by.
Definition: Parallelization_module.F90:280
parallelization_module::process_grid
type(processgrid) process_grid
Definition: Parallelization_module.F90:80
parallelization_module::summarize
subroutine summarize(this)
Write current grid layout to stdout.
Definition: Parallelization_module.F90:228
parallelization_module::group_master_world_rank
integer function group_master_world_rank(this, igroup)
Find out world rank of the master process of a given MPI group.
Definition: Parallelization_module.F90:294
parallelization_module::processgrid
MPI process grid layout.
Definition: Parallelization_module.F90:43
parallelization_module::is_my_group_work
logical function is_my_group_work(this, i)
Check whether this work-item is to be processed by this process' group.
Definition: Parallelization_module.F90:263
parallelization_module
Distribution of processes into a grid.
Definition: Parallelization_module.F90:29
parallelization_module::square
subroutine square(n, a, b)
Given integer area of a box, calculate its edges.
Definition: Parallelization_module.F90:311
parallelization_module::setup_process_grid
subroutine setup_process_grid(this, ngroup, sequential)
Initialize the process grid.
Definition: Parallelization_module.F90:100