Hex  1.0
Hydrogen-electron collision solver
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
itersolve.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
2  * *
3  * / / / / __ \ \ / / *
4  * / /__ / / / _ \ \ \/ / *
5  * / ___ / | |/_/ / /\ \ *
6  * / / / / \_\ / / \ \ *
7  * *
8  * Jakub Benda (c) 2014 *
9  * Charles University in Prague *
10  * *
11 \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12 
13 #ifndef HEX_ITERSOLVE
14 #define HEX_ITERSOLVE
15 
16 #include <chrono>
17 #include <iostream>
18 
19 #include "arrays.h"
20 #include "complex.h"
21 #include "matrix.h"
22 #include "misc.h"
23 
33 {
34  return cArray(n);
35 }
36 
43 inline double default_compute_norm (const cArrayView x)
44 {
45  return x.norm();
46 }
47 
57 {
58  size_t N = x.size();
59  assert (N == y.size());
60 
61  // accelerators
62  Complex * const restrict px = x.data();
63  Complex const * const restrict py = y.data();
64 
65  // do the axby per element
66  for (size_t i = 0; i < N; i++)
67  px[i] = a * px[i] + b * py[i];
68 }
69 
140 template
141 <
142  class TArray,
143  class TArrayView,
144  class Preconditioner,
145  class MatrixMultiplication,
146  class NewArray = decltype(default_new_complex_array),
147  class AxbyOperation = decltype(default_complex_axby),
148  class ScalarProduct = decltype(operator|<Complex>),
149  class ComputeNorm = decltype(default_compute_norm)
150 >
151 unsigned cg_callbacks
152 (
153  const TArrayView b,
154  TArrayView x,
155  double eps,
156  unsigned min_iterations,
157  unsigned max_iterations,
158  Preconditioner apply_preconditioner,
159  MatrixMultiplication matrix_multiply,
160  bool verbose = true,
161  NewArray new_array = default_new_complex_array,
162  AxbyOperation axby = default_complex_axby,
163  ScalarProduct scalar_product = operator|<Complex>,
164  ComputeNorm compute_norm = default_compute_norm
165 )
166 {
167  Timer timer;
168 
169  // compute norm of the right hand side
170  double bnorm = compute_norm(b);
171 
172  // get size of the problem
173  size_t N = b.size();
174 
175  // some auxiliary arrays (search directions etc.)
176  TArray p (std::move(new_array(N)));
177  TArray q (std::move(new_array(N)));
178  TArray z (std::move(new_array(N)));
179 
180  // residual; initialized to starting residual using the initial guess
181  TArray r (std::move(new_array(N)));
182  matrix_multiply(x, r); // r = A x
183  axby (-1., r, 1., b); // r = b - r
184  double rnorm = compute_norm(r);
185 
186  // if the (non-zero) initial guess seems horribly wrong,
187  // use rather the right hand side as the initial guess
188  if (rnorm / bnorm > 1000)
189  {
190  x.fill(0.);
191  axby (0., r, 1., b); // r = b
192  }
193 
194  // some other scalar variables
195  Complex rho_new; // contains inner product r_i^T · r_i
196  Complex rho_old; // contains inner product r_{i-1}^T · r_{i-1}
197  Complex alpha, beta; // contains projection ratios
198 
199  // Iterate
200 
201  unsigned k;
202  for (k = 0; k < max_iterations; k++)
203  {
204  int sec = timer.seconds();
205 
206  if (verbose)
207  {
208  std::cout << "\t[cg] Residual relative magnitude after "
209  << k << " iterations: " << rnorm / bnorm
210  << " (" << sec / 60 << " min)\n";
211  }
212 
213  // apply desired preconditioner
214  apply_preconditioner(r, z); // z = M⁻¹r
215 
216  // compute projection ρ = r·z
217  rho_new = scalar_product(r, z);
218 
219  // setup search direction p
220  if (k == 0)
221  {
222  axby (0., p, 1., z); // p = z
223  }
224  else
225  {
226  beta = rho_new / rho_old;
227  axby (beta, p, 1, z); // p = beta p + z
228  }
229 
230  // move to next Krylov subspace by multiplying A·p
231  matrix_multiply(p, q);
232 
233  // compute projection ratio α
234  alpha = rho_new / scalar_product(p, q);
235 
236  // update the solution and the residual
237  axby (1., x, alpha, p); // x = x + α p
238  axby (1., r, -alpha, q); // r = r - α q
239 
240  // compute and check norm
241  rnorm = compute_norm(r);
242  if (not std::isfinite(rnorm))
243  {
244  std::cout << "\t[cg] Oh my god... the norm of the solution is not finite. Something went wrong!\n";
245  break;
246  }
247 
248  // check convergence, but do at least "min_iterations" iterations
249  if (k >= min_iterations and rnorm / bnorm < eps)
250  break;
251 
252  // move to the next iteration: store previous projection
253  rho_old = rho_new;
254  }
255 
256  return k;
257 }
258 
276 template <typename TFunctor1, typename TFunctor2>
278  const cArrayView b, cArrayView x,
279  double eps,
280  int min_iterations, int max_iterations,
281  TFunctor1 apply_preconditioner,
282  TFunctor2 matrix_multiply,
283  bool verbose = false
284 ) {
285  std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
286  std::chrono::duration<int> sec;
287 
288  int N = b.size();
289  double bnorm = b.norm();
290 
291  cArray x_im1(N), r_im1(N), rt(N), p_i(N), p_im1(N), v_im1(N), phat(N), v_i(N), s(N), shat(N), t(N), x_i(N), r_i(N);
292  Complex rho_im1, rho_im2, beta, alpha_i, alpha_im1, omega_i, omega_im1;
293 
294  x_im1 = x;
295  matrix_multiply(x_im1,r_im1);
296  rt = r_im1 = b - r_im1;
297 
298  int i;
299  for (i = 1; i < max_iterations; i++)
300  {
301  sec = std::chrono::duration_cast<std::chrono::duration<int>>(std::chrono::steady_clock::now()-start);
302 
303  if (verbose)
304  {
305  std::cout << "\t[Bi-CGSTAB] Residual relative magnitude after "
306  << i << " iterations: " << r_im1.norm() / bnorm
307  << " (" << sec.count()/60 << " min)\n";
308  }
309 
310  rho_im1 = (rt | r_im1);
311  if (std::abs(rho_im1) == 0.)
312  throw exception ("[Bi-CGSTAB] Failed, rho = 0.");
313 
314  if (i == 1)
315  {
316  p_i = r_im1;
317  }
318  else
319  {
320  beta = (rho_im1 / rho_im2) * (alpha_im1 / omega_im1);
321  p_i = r_im1 + beta * (p_im1 - omega_im1 * v_im1);
322  }
323 
324  apply_preconditioner(p_i, phat);
325  matrix_multiply(phat, v_i);
326  alpha_i = rho_im1 / (rt | v_i);
327  s = r_im1 - alpha_i * v_i;
328 
329  if (s.norm() < eps * bnorm)
330  {
331  x = x_im1 + alpha_i * phat;
332  break;
333  }
334 
335  apply_preconditioner(s, shat);
336  matrix_multiply(shat, t);
337  omega_i = (t|s) / (t|t);
338 
339  x_i = x_im1 + alpha_i * phat + omega_i * s;
340  r_i = s - omega_i * t;
341 
342  if (r_i.norm() < eps * bnorm)
343  {
344  x = x_i;
345  break;
346  }
347 
348  if (omega_i == 0.)
349  throw exception ("[Bi-CGSTAB] Solver failed, ω = 0.");
350 
351  // shift vectors
352  x_im1 = std::move(x_i);
353  r_im1 = std::move(r_i);
354  p_im1 = std::move(p_i);
355  v_im1 = std::move(v_i);
356 
357  // shift
358  rho_im2 = rho_im1;
359  alpha_im1 = alpha_i;
360  omega_im1 = omega_i;
361  }
362 
363  return i;
364 }
365 
383 template <typename TFunctor1, typename TFunctor2>
385  const cArrayView b, cArrayView x,
386  double eps,
387  int min_iterations, int max_iterations,
388  TFunctor1 apply_preconditioner,
389  TFunctor2 matrix_multiply,
390  bool verbose = false
391 ) {
392  std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
393  std::chrono::duration<int> sec;
394 
395  int N = b.size();
396  double bnorm = b.norm();
397 
398  Complex resid, alpha, beta, rho_1, rho_2;
399  cArray r(N), rt(N), p(N), phat(N), q(N), qhat(N), vhat(N), u(N), uhat(N);
400 
401  matrix_multiply(x,r);
402  rt = r = b - r;
403 
404  if (bnorm == 0.)
405  bnorm = 1;
406 
407  if (r.norm() < eps * bnorm)
408  return 0;
409 
410  int i;
411  for (i = 1; i < max_iterations; i++)
412  {
413  sec = std::chrono::duration_cast<std::chrono::duration<int>>(std::chrono::steady_clock::now()-start);
414 
415  if (verbose)
416  {
417  std::cout << "\t[cgs] Residual relative magnitude after "
418  << i << " iterations: " << r.norm() / bnorm
419  << " (" << sec.count()/60 << " min)\n";
420  }
421 
422  rho_1 = (rt | r);
423 
424  if (rho_1 == 0.)
425  {
426  throw exception ("[cgs] Solver failes, ρ = 0.");
427  }
428  if (i == 1)
429  {
430  u = r;
431  p = u;
432  }
433  else
434  {
435  beta = rho_1 / rho_2;
436  u = r + beta * q;
437  p = u + beta * (q + beta * p);
438  }
439 
440  apply_preconditioner(p, phat);
441  matrix_multiply(phat, vhat);
442 
443  alpha = rho_1 / (rt | vhat);
444  q = u - alpha * vhat;
445 
446  apply_preconditioner(u + q, uhat);
447  matrix_multiply(uhat, qhat);
448 
449  x += alpha * uhat;
450  r -= alpha * qhat;
451  rho_2 = rho_1;
452 
453  if (r.norm() < eps * bnorm)
454  break;
455  }
456 
457  return i;
458 }
459 
460 #endif
Array view.
Definition: arrays.h:186
cArray default_new_complex_array(size_t n)
Return new complex array.
Definition: itersolve.h:32
int bicgstab_callbacks(const cArrayView b, cArrayView x, double eps, int min_iterations, int max_iterations, TFunctor1 apply_preconditioner, TFunctor2 matrix_multiply, bool verbose=false)
Definition: itersolve.h:277
kernel void scalar_product(global double2 *u, global double2 *v, global double2 *z)
Full scalar product.
Definition: kernels.cl:97
int cgs_callbacks(const cArrayView b, cArrayView x, double eps, int min_iterations, int max_iterations, TFunctor1 apply_preconditioner, TFunctor2 matrix_multiply, bool verbose=false)
CGS solver.
Definition: itersolve.h:384
NumberArray< Complex > cArray
Definition: arrays.h:1610
A comfortable number array class.
Definition: arrays.h:171
#define restrict
Definition: misc.h:88
double norm() const
Compute usual 2-norm.
Definition: arrays.h:972
double norm() const
Two-norm (defined only for scalar data type).
Definition: arrays.h:304
size_t size() const
Length of the array (number of elements).
Definition: arrays.h:276
unsigned cg_callbacks(const TArrayView b, TArrayView x, double eps, unsigned min_iterations, unsigned max_iterations, Preconditioner apply_preconditioner, MatrixMultiplication matrix_multiply, bool verbose=true, NewArray new_array=default_new_complex_array, AxbyOperation axby=default_complex_axby, ScalarProduct scalar_product=operator|< Complex >, ComputeNorm compute_norm=default_compute_norm)
Conjugate gradients solver.
Definition: itersolve.h:152
CLArrayView exception
double default_compute_norm(const cArrayView x)
Compute norm of an array.
Definition: itersolve.h:43
virtual T * data()
Pointer to the data.
Definition: arrays.h:280
rArray abs(const cArrayView u)
Definition: arrays.cpp:19
std::complex< double > Complex
Definition: complex.h:20
Timing class.
Definition: misc.h:360
unsigned seconds()
Return elapsed time in seconds.
Definition: misc.h:375
void default_complex_axby(Complex a, cArrayView x, Complex b, const cArrayView y)
Do the operation.
Definition: itersolve.h:56