57 type,
abstract,
extends(basematrix) :: distributedmatrix
59 type(matrixcache) :: temp_cache
61 real(wp) :: memory_scale = 0.75_wp
63 integer :: continuum_counter
65 integer :: start_continuum_update
66 integer :: start_l2_update
70 procedure,
public :: print => print_distributed
71 procedure,
public :: update_continuum => update_continuum_distributed
72 procedure,
public :: update_pure_l2 => update_l2_distributed
74 procedure,
public :: initialize_struct_self => initialize_struct_distributed
76 procedure,
public :: setup_diag_matrix
78 procedure,
public :: construct_self => construct_mat_distributed
79 procedure,
public :: insert_matelem_self => insert_matelem_distributed
80 procedure,
public :: get_matelem_self => get_matelem_distributed
81 procedure,
public :: clear_self => clear_distributed
82 procedure,
public :: destroy_self => destroy_distributed
83 procedure,
public :: finalize_matrix => finalize_distributed
84 procedure,
public :: finalize_matrix_self
85 procedure,
public :: destroy_matrix
86 procedure,
public :: clear_matrix
88 procedure,
public :: insert_into_diag_matrix
89 procedure,
private :: convert_temp_cache_to_array
90 procedure,
private :: update_counter
109 subroutine initialize_struct_distributed (this, matrix_size, matrix_type, block_size)
110 class(distributedmatrix) :: this
111 integer,
intent(in) :: matrix_size, matrix_type, block_size
114 call this % temp_cache % clear
116 this % memory_scale = 0.75_wp
117 call this % setup_diag_matrix(matrix_size, matrix_type, block_size)
119 this % continuum_counter = 0
120 this % L2_counter = 0
126 call this % update_counter
131 subroutine insert_matelem_distributed (this, i, j, coefficient, class, thresh)
132 class(distributedmatrix) :: this
133 integer,
intent(in) :: i, j, class
134 real(wp),
intent(in) :: coefficient, thresh
136 integer :: row, column
138 if (nprocs <= 1)
then
139 dummy = this % insert_into_diag_matrix(i, j, coefficient)
143 if (
class /= 8 .and. class /= 2 .and. this % matrix_type /=
mat_dense)
then
144 call this % temp_cache % insert_into_cache(i, j, coefficient)
147 if (abs(coefficient) < thresh)
return
150 if (this % insert_into_diag_matrix(i, j, coefficient))
return
153 call this % temp_cache % insert_into_cache(i, j, coefficient)
191 subroutine update_continuum_distributed (this, force_update)
192 class(distributedmatrix) :: this
193 logical,
intent(in) :: force_update
195 real(wp),
allocatable :: matrix_coeffs(:)
198 integer :: number_of_chunks, num_elements, nelms_chunk, num_elems, i, j, ido, jdo, ierr, error, length, temp
201 if (this % temp_cache % is_empty())
return
203 if (nprocs <= 1)
return
205 this % continuum_counter = this % continuum_counter + 1
207 if (this % continuum_counter < this % start_continuum_update .and. .not. force_update)
return
209 this % continuum_counter = 0
211 call master_timer % start_timer(
"Update Continuum")
216 number_of_chunks = this % temp_cache % num_matrix_chunks
218 write (stdout,
"('Number of elements to reduce = ',i8)") this % temp_cache % get_size()
222 do ido = 1, number_of_chunks
224 nelms_chunk = this % temp_cache % matrix_arrays(ido) % num_elems
225 allocate(matrix_coeffs(nelms_chunk))
231 call mpi_reduceall_sum_cfp(this % temp_cache % matrix_arrays(ido) % coefficient(1:nelms_chunk), &
232 matrix_coeffs, nelms_chunk, grid % gcomm)
234 this % temp_cache % matrix_arrays(ido) % coefficient(1:nelms_chunk) = matrix_coeffs(1:nelms_chunk)
236 deallocate(matrix_coeffs)
239 num_elems = this % temp_cache % get_size()
242 do ido = 1, num_elems
243 call this % temp_cache % get_from_cache(ido, i, j, coeff)
244 dummy = this % insert_into_diag_matrix(i, j, coeff)
248 call this % temp_cache % clear_and_shrink
249 call master_timer % stop_timer(
"Update Continuum")
250 call this % update_counter
255 subroutine convert_temp_cache_to_array (this, matrix_ij, matrix_coeffs)
257 class(distributedmatrix) :: this
258 integer(longint),
intent(inout) :: matrix_ij(:,:)
259 real(wp),
intent(inout) :: matrix_coeffs(:)
263 write (stdout,
"('Converting cache to array')")
264 do ido = 1, this % temp_cache % n
265 call this % temp_cache % get_from_cache(ido, i, j, matrix_coeffs(ido))
266 matrix_ij(ido, 1) = i
267 matrix_ij(ido, 2) = j
269 write (stdout,
"('done')")
270 call this % temp_cache % clear_and_shrink
275 subroutine update_l2_distributed (this, force_update, count_)
276 class(distributedmatrix) :: this
277 logical,
intent(in) :: force_update
278 real(wp),
allocatable :: matrix_coeffs(:)
279 integer,
optional :: count_
281 integer(longint),
allocatable,
target :: matrix_ij(:,:)
282 integer(longint),
pointer :: mat_ptr(:)
283 integer(longint) :: my_num_of_elements, procs_num_of_elements, largest_num_of_elems
285 integer :: count_amount, ido, proc_id, i, j, ierr
291 if (
present(count_)) count_amount = count_
293 this % L2_counter = this % L2_counter + count_amount
295 if (this % L2_counter < this % start_L2_update .and. .not. force_update)
return
297 my_num_of_elements = this % temp_cache % get_size()
299 if (nprocs <= 1)
then
300 do ido = 1, my_num_of_elements
301 call this % temp_cache % get_from_cache(ido, i, j, coeff)
302 dummy = this % insert_into_diag_matrix(i, j, coeff)
305 call this % temp_cache % clear
309 call mpi_reduceall_max(my_num_of_elements, largest_num_of_elems, grid % gcomm)
311 if (largest_num_of_elems < this % start_L2_update .and. .not. force_update)
then
312 this % L2_counter = largest_num_of_elems
316 this % L2_counter = 0
318 call this % temp_cache % shrink_capacity
323 write (stdout,
"('The largest number of elements is ',i10,' mine is ',i10)") largest_num_of_elems, my_num_of_elements
327 if (largest_num_of_elems == 0)
return
328 write (stdout,
"('Starting L2 update')")
331 call master_timer % start_timer(
"Update L2")
332 allocate(matrix_ij(largest_num_of_elems, 2), matrix_coeffs(largest_num_of_elems), stat = ierr)
334 call master_memory % track_memory(storage_size(matrix_ij)/8,
size(matrix_ij), ierr,
"DIST::L2UPDATE::MAT_IJ")
335 call master_memory % track_memory(storage_size(matrix_coeffs)/8,
size(matrix_coeffs), ierr,
"DIST::L2UPDATE::MAT_COEFFS")
338 write (stdout,
"('Memory allocation error during update!')")
339 stop
"Memory allocation error"
345 mat_ptr(1:largest_num_of_elems*2) => matrix_ij(:,:)
347 call this % convert_temp_cache_to_array(matrix_ij(:,:), matrix_coeffs(:))
348 call this % temp_cache%clear_and_shrink
350 procs_num_of_elements = my_num_of_elements
353 do proc_id = 1, grid % gprows * grid % gpcols - 1
354 call mpi_mod_rotate_arrays_around_ring(procs_num_of_elements, mat_ptr, &
355 matrix_coeffs, largest_num_of_elems, grid % gcomm)
356 do ido = 1, procs_num_of_elements
357 dummy = this % insert_into_diag_matrix(int(matrix_ij(ido,1)), int(matrix_ij(ido,2)), matrix_coeffs(ido))
361 call master_memory % free_memory(storage_size(matrix_ij)/8,
size(matrix_ij))
362 call master_memory % free_memory(storage_size(matrix_coeffs)/8,
size(matrix_coeffs))
365 deallocate(matrix_ij,matrix_coeffs)
368 call this % temp_cache % clear_and_shrink
369 call master_timer % stop_timer(
"Update L2")
371 write (stdout,
"('Finished L2 update')")
374 call this % update_counter
432 subroutine update_counter (this)
433 class(distributedmatrix) :: this
434 integer :: ifail, per_elm, dummy_int, c_update, l_update
435 real(wp) :: dummy_real
437 this % continuum_counter = 0
438 this % L2_counter = 0
444 per_elm = kind(dummy_int) * 2 + kind(dummy_real) + 4
446 c_update = master_memory % get_scaled_available_memory(this % memory_scale) / (per_elm * 2)
447 l_update = master_memory % get_scaled_available_memory(this % memory_scale) / (per_elm * 2)
449 call mpi_reduceall_min(c_update, this % start_continuum_update, grid % gcomm)
450 call mpi_reduceall_min(l_update, this % start_L2_update, grid % gcomm)
452 call master_memory % print_memory_report
454 write (stdout,
"(2i16,' updates will occur at continuum = ',i12,' and L2 = ',i12)") &
455 c_update, l_update, this % start_continuum_update, this % start_L2_update