34 use const_gbl,
only: stdout
36 use precisn,
only: longint, wp
37 use mpi_gbl,
only: nprocs, mpi_xermsg, mpi_reduceall_min, mpi_reduceall_max, &
38 mpi_mod_rotate_arrays_around_ring, mpi_reduceall_inplace_sum_cfp, mpi_reduceall_sum_cfp
61 real(wp) :: memory_scale = 0.75_wp
63 integer :: continuum_counter
65 integer :: start_continuum_update
66 integer :: start_l2_update
98 write (stdout,
"('Constructing Distributed matrix')")
101 call this % temp_cache % construct
104 call this % temp_cache % clear
111 integer,
intent(in) :: matrix_size, matrix_type, block_size
114 call this % temp_cache % clear
116 this % memory_scale = 0.75_wp
117 call this % setup_diag_matrix(matrix_size, matrix_type, block_size)
119 this % continuum_counter = 0
120 this % L2_counter = 0
126 call this % update_counter
133 integer,
intent(in) :: i, j, class
134 real(wp),
intent(in) :: coefficient, thresh
136 integer :: row, column
138 if (nprocs <= 1)
then
139 dummy = this % insert_into_diag_matrix(i, j, coefficient)
143 if (
class /= 8 .and. class /= 2 .and. this % matrix_type /=
mat_dense)
then
144 call this % temp_cache % insert_into_cache(i, j, coefficient)
147 if (abs(coefficient) < thresh)
return
150 if (this % insert_into_diag_matrix(i, j, coefficient))
return
153 call this % temp_cache % insert_into_cache(i, j, coefficient)
161 integer,
intent(in) :: idx
162 integer,
intent(out) :: i, j
163 real(wp),
intent(out) :: coeff
174 integer,
intent(in) :: matrix_size, matrix_type, block_size
182 integer,
intent(in) :: row, column
183 real(wp),
intent(in) :: coefficient
186 call mpi_xermsg(
'DistributedMatrix_module',
'insert_into_diag_matrix',
'Not implemented', 1, 1)
193 logical,
intent(in) :: force_update
195 real(wp),
allocatable :: matrix_coeffs(:)
198 integer :: number_of_chunks, num_elements, nelms_chunk, num_elems, i, j, ido, jdo, ierr, error, length, temp
201 if (this % temp_cache % is_empty())
return
203 if (nprocs <= 1)
return
205 this % continuum_counter = this % continuum_counter + 1
207 if (this % continuum_counter < this % start_continuum_update .and. .not. force_update)
return
209 this % continuum_counter = 0
216 number_of_chunks = this % temp_cache % num_matrix_chunks
218 write (stdout,
"('Number of elements to reduce = ',i8)") this % temp_cache % get_size()
222 do ido = 1, number_of_chunks
224 nelms_chunk = this % temp_cache % matrix_arrays(ido) % num_elems
225 allocate(matrix_coeffs(nelms_chunk))
231 call mpi_reduceall_sum_cfp(this % temp_cache % matrix_arrays(ido) % coefficient(1:nelms_chunk), &
232 matrix_coeffs, nelms_chunk, grid % gcomm)
234 this % temp_cache % matrix_arrays(ido) % coefficient(1:nelms_chunk) = matrix_coeffs(1:nelms_chunk)
236 deallocate(matrix_coeffs)
239 num_elems = this % temp_cache % get_size()
242 do ido = 1, num_elems
243 call this % temp_cache % get_from_cache(ido, i, j, coeff)
244 dummy = this % insert_into_diag_matrix(i, j, coeff)
248 call this % temp_cache % clear_and_shrink
250 call this % update_counter
258 integer(longint),
intent(inout) :: matrix_ij(:,:)
259 real(wp),
intent(inout) :: matrix_coeffs(:)
263 write (stdout,
"('Converting cache to array')")
264 do ido = 1, this % temp_cache % n
265 call this % temp_cache % get_from_cache(ido, i, j, matrix_coeffs(ido))
266 matrix_ij(ido, 1) = i
267 matrix_ij(ido, 2) = j
269 write (stdout,
"('done')")
270 call this % temp_cache % clear_and_shrink
277 logical,
intent(in) :: force_update
278 real(wp),
allocatable :: matrix_coeffs(:)
279 integer,
optional :: count_
281 integer(longint),
allocatable,
target :: matrix_ij(:,:)
282 integer(longint),
pointer :: mat_ptr(:)
283 integer(longint) :: my_num_of_elements, procs_num_of_elements, largest_num_of_elems
285 integer :: count_amount, ido, proc_id, i, j, ierr
291 if (
present(count_)) count_amount = count_
293 this % L2_counter = this % L2_counter + count_amount
295 if (this % L2_counter < this % start_L2_update .and. .not. force_update)
return
297 my_num_of_elements = this % temp_cache % get_size()
299 if (nprocs <= 1)
then
300 do ido = 1, my_num_of_elements
301 call this % temp_cache % get_from_cache(ido, i, j, coeff)
302 dummy = this % insert_into_diag_matrix(i, j, coeff)
305 call this % temp_cache % clear
309 call mpi_reduceall_max(my_num_of_elements, largest_num_of_elems, grid % gcomm)
311 if (largest_num_of_elems < this % start_L2_update .and. .not. force_update)
then
312 this % L2_counter = largest_num_of_elems
316 this % L2_counter = 0
318 call this % temp_cache % shrink_capacity
323 write (stdout,
"('The largest number of elements is ',i10,' mine is ',i10)") largest_num_of_elems, my_num_of_elements
327 if (largest_num_of_elems == 0)
return
328 write (stdout,
"('Starting L2 update')")
332 allocate(matrix_ij(largest_num_of_elems, 2), matrix_coeffs(largest_num_of_elems), stat = ierr)
334 call master_memory % track_memory(kind(matrix_ij),
size(matrix_ij), ierr,
"DIST::L2UPDATE::MAT_IJ")
335 call master_memory % track_memory(kind(matrix_coeffs),
size(matrix_coeffs), ierr,
"DIST::L2UPDATE::MAT_COEFFS")
338 write (stdout,
"('Memory allocation error during update!')")
339 stop
"Memory allocation error"
345 mat_ptr(1:largest_num_of_elems*2) => matrix_ij(:,:)
347 call this % convert_temp_cache_to_array(matrix_ij(:,:), matrix_coeffs(:))
348 call this % temp_cache%clear_and_shrink
350 procs_num_of_elements = my_num_of_elements
353 do proc_id = 1, grid % gprows * grid % gpcols - 1
354 call mpi_mod_rotate_arrays_around_ring(procs_num_of_elements, mat_ptr, &
355 matrix_coeffs, largest_num_of_elems, grid % gcomm)
356 do ido = 1, procs_num_of_elements
357 dummy = this % insert_into_diag_matrix(int(matrix_ij(ido,1)), int(matrix_ij(ido,2)), matrix_coeffs(ido))
361 call master_memory % free_memory(kind(matrix_ij),
size(matrix_ij))
362 call master_memory % free_memory(kind(matrix_coeffs),
size(matrix_coeffs))
365 deallocate(matrix_ij,matrix_coeffs)
368 call this % temp_cache % clear_and_shrink
371 write (stdout,
"('Finished L2 update')")
374 call this % update_counter
392 call mpi_reduceall_inplace_sum_cfp(this % diagonal, this % matrix_dimension, grid % gcomm)
393 call this % finalize_matrix_self
406 write (stdout,
"('-------TEMP CACHE---------')")
408 call this % temp_cache % print
416 call this % temp_cache % clear_and_shrink
417 call this % clear_matrix
426 call this % temp_cache % destroy
427 call this % destroy_matrix
434 integer :: ifail, per_elm, dummy_int, c_update, l_update
435 real(wp) :: dummy_real
437 this % continuum_counter = 0
438 this % L2_counter = 0
444 per_elm = kind(dummy_int) * 2 + kind(dummy_real) + 4
446 c_update =
master_memory % get_scaled_available_memory(this % memory_scale) / (per_elm * 2)
447 l_update =
master_memory % get_scaled_available_memory(this % memory_scale) / (per_elm * 2)
449 call mpi_reduceall_min(c_update, this % start_continuum_update, grid % gcomm)
450 call mpi_reduceall_min(l_update, this % start_L2_update, grid % gcomm)
454 write (stdout,
"(2i16,' updates will occur at continuum = ',i12,' and L2 = ',i12)") &
455 c_update, l_update, this % start_continuum_update, this % start_L2_update