59 assert (N == y.
size());
66 for (
size_t i = 0; i < N; i++)
67 px[i] = a * px[i] + b * py[i];
144 class Preconditioner,
145 class MatrixMultiplication,
148 class ScalarProduct = decltype(
operator|<
Complex>),
156 unsigned min_iterations,
157 unsigned max_iterations,
158 Preconditioner apply_preconditioner,
159 MatrixMultiplication matrix_multiply,
170 double bnorm = compute_norm(b);
176 TArray p (std::move(new_array(N)));
177 TArray q (std::move(new_array(N)));
178 TArray z (std::move(new_array(N)));
181 TArray r (std::move(new_array(N)));
182 matrix_multiply(x, r);
183 axby (-1., r, 1., b);
184 double rnorm = compute_norm(r);
188 if (rnorm / bnorm > 1000)
202 for (k = 0; k < max_iterations; k++)
208 std::cout <<
"\t[cg] Residual relative magnitude after "
209 << k <<
" iterations: " << rnorm / bnorm
210 <<
" (" << sec / 60 <<
" min)\n";
214 apply_preconditioner(r, z);
226 beta = rho_new / rho_old;
227 axby (beta, p, 1, z);
231 matrix_multiply(p, q);
237 axby (1., x, alpha, p);
238 axby (1., r, -alpha, q);
241 rnorm = compute_norm(r);
242 if (not std::isfinite(rnorm))
244 std::cout <<
"\t[cg] Oh my god... the norm of the solution is not finite. Something went wrong!\n";
249 if (k >= min_iterations and rnorm / bnorm < eps)
276 template <
typename TFunctor1,
typename TFunctor2>
280 int min_iterations,
int max_iterations,
281 TFunctor1 apply_preconditioner,
282 TFunctor2 matrix_multiply,
285 std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
286 std::chrono::duration<int> sec;
289 double bnorm = b.
norm();
291 cArray x_im1(N), r_im1(N), rt(N), p_i(N), p_im1(N), v_im1(N), phat(N), v_i(N), s(N), shat(N), t(N), x_i(N), r_i(N);
292 Complex rho_im1, rho_im2, beta, alpha_i, alpha_im1, omega_i, omega_im1;
295 matrix_multiply(x_im1,r_im1);
296 rt = r_im1 = b - r_im1;
299 for (i = 1; i < max_iterations; i++)
301 sec = std::chrono::duration_cast<std::chrono::duration<int>>(std::chrono::steady_clock::now()-start);
305 std::cout <<
"\t[Bi-CGSTAB] Residual relative magnitude after "
306 << i <<
" iterations: " << r_im1.
norm() / bnorm
307 <<
" (" << sec.count()/60 <<
" min)\n";
310 rho_im1 = (rt | r_im1);
312 throw exception (
"[Bi-CGSTAB] Failed, rho = 0.");
320 beta = (rho_im1 / rho_im2) * (alpha_im1 / omega_im1);
321 p_i = r_im1 + beta * (p_im1 - omega_im1 * v_im1);
324 apply_preconditioner(p_i, phat);
325 matrix_multiply(phat, v_i);
326 alpha_i = rho_im1 / (rt | v_i);
327 s = r_im1 - alpha_i * v_i;
329 if (s.norm() < eps * bnorm)
331 x = x_im1 + alpha_i * phat;
335 apply_preconditioner(s, shat);
336 matrix_multiply(shat, t);
337 omega_i = (t|s) / (t|t);
339 x_i = x_im1 + alpha_i * phat + omega_i * s;
340 r_i = s - omega_i * t;
342 if (r_i.
norm() < eps * bnorm)
349 throw exception (
"[Bi-CGSTAB] Solver failed, ω = 0.");
352 x_im1 = std::move(x_i);
353 r_im1 = std::move(r_i);
354 p_im1 = std::move(p_i);
355 v_im1 = std::move(v_i);
383 template <
typename TFunctor1,
typename TFunctor2>
387 int min_iterations,
int max_iterations,
388 TFunctor1 apply_preconditioner,
389 TFunctor2 matrix_multiply,
392 std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
393 std::chrono::duration<int> sec;
396 double bnorm = b.
norm();
398 Complex resid, alpha, beta, rho_1, rho_2;
399 cArray r(N), rt(N), p(N), phat(N), q(N), qhat(N), vhat(N), u(N), uhat(N);
401 matrix_multiply(x,r);
407 if (r.norm() < eps * bnorm)
411 for (i = 1; i < max_iterations; i++)
413 sec = std::chrono::duration_cast<std::chrono::duration<int>>(std::chrono::steady_clock::now()-start);
417 std::cout <<
"\t[cgs] Residual relative magnitude after "
418 << i <<
" iterations: " << r.
norm() / bnorm
419 <<
" (" << sec.count()/60 <<
" min)\n";
426 throw exception (
"[cgs] Solver failes, ρ = 0.");
435 beta = rho_1 / rho_2;
437 p = u + beta * (q + beta * p);
440 apply_preconditioner(p, phat);
441 matrix_multiply(phat, vhat);
443 alpha = rho_1 / (rt | vhat);
444 q = u - alpha * vhat;
446 apply_preconditioner(u + q, uhat);
447 matrix_multiply(uhat, qhat);
453 if (r.norm() < eps * bnorm)
Array view.
Definition: arrays.h:186
cArray default_new_complex_array(size_t n)
Return new complex array.
Definition: itersolve.h:32
int bicgstab_callbacks(const cArrayView b, cArrayView x, double eps, int min_iterations, int max_iterations, TFunctor1 apply_preconditioner, TFunctor2 matrix_multiply, bool verbose=false)
Definition: itersolve.h:277
kernel void scalar_product(global double2 *u, global double2 *v, global double2 *z)
Full scalar product.
Definition: kernels.cl:97
int cgs_callbacks(const cArrayView b, cArrayView x, double eps, int min_iterations, int max_iterations, TFunctor1 apply_preconditioner, TFunctor2 matrix_multiply, bool verbose=false)
CGS solver.
Definition: itersolve.h:384
NumberArray< Complex > cArray
Definition: arrays.h:1610
A comfortable number array class.
Definition: arrays.h:171
#define restrict
Definition: misc.h:88
double norm() const
Compute usual 2-norm.
Definition: arrays.h:972
double norm() const
Two-norm (defined only for scalar data type).
Definition: arrays.h:304
size_t size() const
Length of the array (number of elements).
Definition: arrays.h:276
unsigned cg_callbacks(const TArrayView b, TArrayView x, double eps, unsigned min_iterations, unsigned max_iterations, Preconditioner apply_preconditioner, MatrixMultiplication matrix_multiply, bool verbose=true, NewArray new_array=default_new_complex_array, AxbyOperation axby=default_complex_axby, ScalarProduct scalar_product=operator|< Complex >, ComputeNorm compute_norm=default_compute_norm)
Conjugate gradients solver.
Definition: itersolve.h:152
double default_compute_norm(const cArrayView x)
Compute norm of an array.
Definition: itersolve.h:43
virtual T * data()
Pointer to the data.
Definition: arrays.h:280
rArray abs(const cArrayView u)
Definition: arrays.cpp:19
std::complex< double > Complex
Definition: complex.h:20
Timing class.
Definition: misc.h:360
unsigned seconds()
Return elapsed time in seconds.
Definition: misc.h:375
void default_complex_axby(Complex a, cArrayView x, Complex b, const cArrayView y)
Do the operation.
Definition: itersolve.h:56