MPI-SCATCI 2.0
An MPI version of SCATCI
Loading...
Searching...
No Matches
Parallelization_module.F90
Go to the documentation of this file.
1! Copyright 2019
2!
3! For a comprehensive list of the developers that contributed to these codes
4! see the UK-AMOR website.
5!
6! This file is part of UKRmol-in (UKRmol+ suite).
7!
8! UKRmol-in is free software: you can redistribute it and/or modify
9! it under the terms of the GNU General Public License as published by
10! the Free Software Foundation, either version 3 of the License, or
11! (at your option) any later version.
12!
13! UKRmol-in is distributed in the hope that it will be useful,
14! but WITHOUT ANY WARRANTY; without even the implied warranty of
15! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16! GNU General Public License for more details.
17!
18! You should have received a copy of the GNU General Public License
19! along with UKRmol-in (in source/COPYING). Alternatively, you can also visit
20! <https://www.gnu.org/licenses/>.
21
22!> \brief Distribution of processes into a grid
23!> \authors J Benda
24!> \date 2019
25!>
26!> This module contains utility routines and types which aid with the "parallelization or parallelization", i.e.,
27!> with concurrent distributed diagonalizations of Hamiltonians for multiple irreducible representations.
28!>
29module parallelization_module
30
31#ifdef usempi
32 use mpi
33#endif
34 use blas_lapack_gbl, only: blasint
35 use mpi_gbl, only: mpiint, myrank, nprocs, local_rank, local_nprocs, shared_communicator
36
37 implicit none
38
39 !> \brief MPI process grid layout
40 !> \authors J Benda
41 !> \date 2019
42 !>
43 type :: processgrid
44 integer(blasint) :: wcntxt !< BLACS context containing all MPI processes in the world communicator
45 integer(blasint) :: wprows !< MPI world communicator grid row count
46 integer(blasint) :: wpcols !< MPI world communicator grid column count
47 integer(blasint) :: mywrow !< row position of this process within the MPI world communicator
48 integer(blasint) :: mywcol !< column position of this process within the MPI world communicator
49
50 integer :: igroup !< zero-based index of the MPI group this process belongs to
51 integer :: ngroup !< total number of MPI groups partitioning the world communicator
52
53 integer :: gprocs !< number of processes in the current MPI group
54 integer :: grank !< rank of this process within the MPI group
55 integer :: lprocs !< number of processes of the current MPI group localized on a single node
56 integer :: lrank !< rank of this process within the local MPI group
57
58 integer(blasint) :: gcntxt !< BLACS context containing all MPI processes in the MPI group communicator
59 integer(blasint) :: gprows !< MPI group communicator grid row count
60 integer(blasint) :: gpcols !< MPI group communicator grid column count
61 integer(blasint) :: mygrow !< row position of this process within the MPI group communicator
62 integer(blasint) :: mygcol !< column position of this process within the MPI group communicator
63
64 integer(mpiint) :: gcomm !< MPI group communicator
65 integer(mpiint) :: lcomm !< subset of the MPI group communicator localized on a single node
66
67 integer, allocatable :: groupnprocs(:) !< Number of processes per MPI group.
68
69 logical :: sequential !< Whether the diagonalizations will be done sequentially (one after another) or not
70 contains
71 procedure, public, pass :: setup => setup_process_grid
72 procedure, public, pass :: is_my_group_work
73 procedure, public, pass :: which_group_is_work
74 procedure, public, pass :: group_master_world_rank
75 procedure, public, pass :: summarize
76 procedure, private, nopass :: square
77 end type processgrid
78
79 ! global instance of the process grid
80 type(processgrid) :: process_grid
81
82contains
83
84 !> \brief Initialize the process grid
85 !> \authors J Benda
86 !> \date 2019
87 !>
88 !> Splits the world communicator to the given number of MPI groups. Sets up all global group communicators and local group
89 !> communicators (subset on one node). If there are more groups than processes, then all groups are equal to the MPI
90 !> world. If the number of processes is not divisible by the number of groups, the leading mod(nprocs,ngroup) processes will
91 !> contain 1 process more than the rest of the groups.
92 !>
93 !> \param this Process grid to set up.
94 !> \param ngroup Number of MPI groups to create.
95 !> \param sequential If "true", all diagonalizations will be done in sequence (not concurrently, even if there are
96 !> enough CPUs to create requested groups). This is needed to have the eigenvectors written to disk,
97 !> which does not happen with concurrent diagonalizations.
98 !>
99 subroutine setup_process_grid (this, ngroup, sequential)
100
101 use const_gbl, only: stdout
102
103 class(processgrid), intent(inout) :: this
104 integer, intent(in) :: ngroup
105 logical, intent(in) :: sequential
106
107 integer, allocatable :: groupstarts(:), groupends(:)
108 integer(mpiint) :: n, color, ierr
109 integer(blasint) :: cntxt, rows, cols
110 integer :: i, j, k, rank
111 integer(blasint), allocatable :: groupmap(:,:)
112
113 allocate (this % groupnprocs(ngroup), groupstarts(ngroup), groupends(ngroup))
114
115 ! no context splitting needed if too few processes, or if concurrent diagonalizations are not desired
116 this % sequential = (sequential .or. ngroup == 1 .or. nprocs < ngroup)
117
118 ! default initialization
119 this % gcomm = 0
120 this % lcomm = shared_communicator
121 this % igroup = 0
122 this % ngroup = merge(1, ngroup, this % sequential)
123 this % gprocs = nprocs
124 this % grank = myrank
125 this % lprocs = local_nprocs
126 this % lrank = local_rank
127 this % groupnprocs = nprocs
128
129#ifdef usempi
130# ifdef scalapack
131 ! allocate BLACS context for the world
132 call this % square(int(nprocs), this % wprows, this % wpcols)
133 call blacs_get(-1_blasint, 0_blasint, this % wcntxt)
134 call blacs_gridinit(this % wcntxt, 'r', this % wprows, this % wpcols)
135 call blacs_gridinfo(this % wcntxt, this % wprows, this % wpcols, this % mywrow, this % mywcol)
136# endif
137
138 if (this % sequential) then
139 this % gcntxt = this % wcntxt
140 this % gcomm = mpi_comm_world
141 this % groupnprocs = nprocs
142 this % gprows = this % wprows
143 this % gpcols = this % wpcols
144 this % mygrow = this % mywrow
145 this % mygcol = this % mywcol
146 return
147 end if
148
149 ! spread processes among groups (leading groups will end with more processes on imbalance)
150 this % groupnprocs(:) = 0
151 do i = 1, nprocs
152 j = 1 + mod(i - 1, this % ngroup)
153 this % groupnprocs(j) = this % groupnprocs(j) + 1
154 end do
155 groupstarts(1) = 0
156 groupends(this % ngroup) = nprocs - 1
157 do i = 1, this % ngroup - 1
158 groupstarts(i + 1) = groupstarts(i) + this % groupnprocs(i)
159 groupends(i) = groupstarts(i + 1) - 1
160 end do
161
162 ! find out which group this process belongs to (store zero-based index)
163 do i = 1, this % ngroup
164 if (groupstarts(i) <= myrank .and. myrank <= groupends(i)) then
165 this % igroup = i - 1
166 exit
167 end if
168 end do
169
170 ! create a MPI group and split it to sub-groups on individual nodes
171 color = this % igroup
172 call mpi_comm_split(mpi_comm_world, color, myrank, this % gcomm, ierr)
173 call mpi_comm_split_type(this % gcomm, mpi_comm_type_shared, myrank, mpi_info_null, this % lcomm, ierr)
174 call mpi_comm_rank(this % gcomm, n, ierr); this % grank = n
175 call mpi_comm_size(this % gcomm, n, ierr); this % gprocs = n
176 call mpi_comm_rank(this % lcomm, n, ierr); this % lrank = n
177 call mpi_comm_size(this % lcomm, n, ierr); this % lprocs = n
178
179# ifdef scalapack
180 ! Allocate BLACS context for the group. This has to be done separately for each group,
181 ! not only once with a group-dependent content of `groupmap`, because `blacs_gridmap` uses
182 ! `MPI_Group_incl`, which does not support creation of several groups at one time.
183
184 write (stdout, '(/," Creating BLACS groups")')
185 write (stdout, '( " ---------------------")')
186
187 do i = 1, this % ngroup
188
189 ! populate matrix of ranks that belong to the this group's grid
190 call this % square(this % groupnprocs(i), rows, cols)
191 allocate (groupmap(rows, cols))
192 rank = 0
193 do k = 1, cols
194 do j = 1, rows
195 groupmap(j, k) = groupstarts(i) + rank
196 rank = rank + 1
197 end do
198 end do
199
200 ! split off a new BLACS context in a collective call
201 call blacs_get(-1_blasint, 0_blasint, cntxt)
202 write (stdout, '(" Group #",I0," size ",I0,"x",I0," grid ",*(1x,I0))') i, rows, cols, groupmap
203 call blacs_gridmap(cntxt, groupmap, rows, rows, cols)
204 deallocate (groupmap)
205
206 ! if this group is mine, store the group information
207 if (i == this % igroup + 1) then
208 this % gprows = rows
209 this % gpcols = cols
210 this % gcntxt = cntxt
211 end if
212
213 end do
214
215 ! find out this process' position within its BLACS grid
216 call blacs_gridinfo(this % gcntxt, this % gprows, this % gpcols, this % mygrow, this % mygcol)
217# endif
218#endif
219
220 end subroutine setup_process_grid
221
222
223 !> \brief Write current grid layout to stdout
224 !> \authors J Benda
225 !> \date 2019
226 !>
227 subroutine summarize (this)
228
229 use const_gbl, only: stdout
230
231 class(processgrid), intent(in) :: this
232
233 integer :: i
234
235 write (stdout, *)
236 write (stdout, '(1x,A)') 'Process grid'
237 write (stdout, '(1x,A)') '------------'
238 write (stdout, '(1x,A,L1)') 'Sequential diagonalizations: ', this % sequential
239 write (stdout, '(1x,A,I0)') 'Number of groups: ', this % ngroup
240 write (stdout, '(1x,A)', advance = 'no') 'Number of processes per group:'
241 do i = 1, this % ngroup
242 write (stdout, '(1x,I0)', advance = 'no') this % groupnprocs(i)
243 end do
244 write (stdout, *)
245 write (stdout, '(1x,A,I0)') 'This process belongs to group: ', this % igroup
246 write (stdout, '(1x,A,I0)') 'This group size: ', this % gprocs
247 write (stdout, '(1x,A,I0)') 'This group rank: ', this % grank
248 write (stdout, '(1x,A,I0)') 'Shared-memory sub-group size: ', this % lprocs
249 write (stdout, '(1x,A,I0)') 'Shared-memory sub-group rank: ', this % lrank
250 write (stdout, *)
251
252 end subroutine summarize
253
254
255 !> \brief Check whether this work-item is to be processed by this process' group
256 !> \authors J Benda
257 !> \date 2019
258 !>
259 !> The work-item index is expected to be greater than or equal to 1. Work-items (indices) larger
260 !> than the number of MPI groups wrap around.
261 !>
262 logical function is_my_group_work (this, i) result (verdict)
263
264 class(processgrid), intent(inout) :: this
265 integer, intent(in) :: i
266
267 verdict = (this % which_group_is_work(i) == this % igroup)
268
269 end function is_my_group_work
270
271
272 !> \brief Find out which group workitem will be processed by
273 !> \authors J Benda
274 !> \date 2019
275 !>
276 !> The work-item index is expected to be greater than or equal to 1. Work-items (indices) larger
277 !> than the number of MPI groups wrap around. Groups are numbered from 0.
278 !>
279 integer function which_group_is_work (this, i) result (igroup)
280
281 class(processgrid), intent(inout) :: this
282 integer, intent(in) :: i
283
284 igroup = mod(i - 1, this % ngroup)
285
286 end function which_group_is_work
287
288
289 !> \brief Find out world rank of the master process of a given MPI group
290 !> \authors J Benda
291 !> \date 2019
292 !>
293 integer function group_master_world_rank (this, igroup) result (rank)
294
295 class(processgrid), intent(inout) :: this
296 integer, intent(in) :: igroup
297
298 rank = sum(this % groupnprocs(1:igroup))
299
300 end function group_master_world_rank
301
302
303 !> \brief Given integer area of a box, calculate its edges
304 !> \authors A Al-Refaie, J Benda
305 !> \date 2017 - 2019
306 !>
307 !> Return positive `a` and `b` such that their product is exactly equal to `n`, and the difference
308 !> between `a` and `b` is as small as possible. On return `a` is always less than or equal to `b`.
309 !>
310 subroutine square (n, a, b)
311
312 integer, intent(in) :: n
313 integer(blasint), intent(out) :: a, b
314
315 integer :: i
316
317 do i = 1, int(sqrt(real(n)) + 1)
318 if (mod(n, i) == 0) then
319 a = i
320 b = n / i
321 end if
322 end do
323
324 end subroutine square
325
326end module parallelization_module