hex/hex-ecs/itersolve_8h_source.html

 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\

  *                                                                           *

  *                       / /   / /    __    \ \  / /                         *

  *                      / /__ / /   / _ \    \ \/ /                          *

  *                     /  ___  /   | |/_/    / /\ \                          *

  *                    / /   / /    \_\      / /  \ \                         *

  *                                                                           *

  *                         Jakub Benda (c) 2014                              *

  *                     Charles University in Prague                          *

  *                                                                           *

 \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


 #ifndef HEX_ITERSOLVE

 #define HEX_ITERSOLVE


 #include <chrono>

 #include <iostream>


 #include "arrays.h"

 #include "complex.h"

 #include "matrix.h"

 #include "misc.h"


 inline cArray default_new_complex_array (size_t n)

 {

     return cArray(n);

 }


 inline double default_compute_norm (const cArrayView x)

 {

     return x.norm();

 }


 inline void default_complex_axby (Complex a, cArrayView x, Complex b, const cArrayView y)

 {

     size_t N = x.size();

     assert (N == y.size());


     // accelerators

     Complex       * const restrict px = x.data();

     Complex const * const restrict py = y.data();


     // do the axby per element

     for (size_t i = 0; i < N; i++)

         px[i] = a * px[i] + b * py[i];

 }


 template

 <

     class TArray,

     class TArrayView,

     class Preconditioner,

     class MatrixMultiplication,

     class NewArray      = decltype(default_new_complex_array),

     class AxbyOperation = decltype(default_complex_axby),

     class ScalarProduct = decltype(operator|<Complex>),

     class ComputeNorm   = decltype(default_compute_norm)

 >

 unsigned cg_callbacks

 (

         const TArrayView b,

               TArrayView x,

                   double eps,

                 unsigned min_iterations,

                 unsigned max_iterations,

           Preconditioner apply_preconditioner,

     MatrixMultiplication matrix_multiply,

                     bool verbose        = true,

                 NewArray new_array      = default_new_complex_array,

            AxbyOperation axby           = default_complex_axby,

            ScalarProduct scalar_product = operator|<Complex>,

              ComputeNorm compute_norm   = default_compute_norm

 )

 {

     Timer timer;


     // compute norm of the right hand side

     double bnorm = compute_norm(b);


     // get size of the problem

     size_t N = b.size();


     // some auxiliary arrays (search directions etc.)

     TArray p (std::move(new_array(N)));

     TArray q (std::move(new_array(N)));

     TArray z (std::move(new_array(N)));


     // residual; initialized to starting residual using the initial guess

     TArray r (std::move(new_array(N)));

     matrix_multiply(x, r); // r = A x

     axby (-1., r, 1., b); // r = b - r

     double rnorm = compute_norm(r);


     // if the (non-zero) initial guess seems horribly wrong,

     //    use rather the right hand side as the initial guess

     if (rnorm / bnorm > 1000)

     {

         x.fill(0.);

         axby (0., r, 1., b); // r = b

     }


     // some other scalar variables

     Complex rho_new;        // contains inner product r_i^T · r_i

     Complex rho_old;        // contains inner product r_{i-1}^T · r_{i-1}

     Complex alpha, beta;    // contains projection ratios


     // Iterate


     unsigned k;

     for (k = 0; k < max_iterations; k++)

     {

         int sec = timer.seconds();


         if (verbose)

         {

             std::cout << "\t[cg] Residual relative magnitude after "

                     << k << " iterations: " << rnorm / bnorm

                     << " (" << sec / 60 << " min)\n";

         }


         // apply desired preconditioner

         apply_preconditioner(r, z); // z = M⁻¹r


         // compute projection ρ = r·z

         rho_new = scalar_product(r, z);


         // setup search direction p

         if (k == 0)

         {

             axby (0., p, 1., z); // p = z

         }

         else

         {

             beta = rho_new / rho_old;

             axby (beta, p, 1, z); // p = beta p + z

         }


         // move to next Krylov subspace by multiplying A·p

         matrix_multiply(p, q);


         // compute projection ratio α

         alpha = rho_new / scalar_product(p, q);


         // update the solution and the residual

         axby (1., x, alpha, p); // x = x + α p

         axby (1., r, -alpha, q); // r = r - α q


         // compute and check norm

         rnorm = compute_norm(r);

         if (not std::isfinite(rnorm))

         {

             std::cout << "\t[cg] Oh my god... the norm of the solution is not finite. Something went wrong!\n";

             break;

         }


         // check convergence, but do at least "min_iterations" iterations

         if (k >= min_iterations and rnorm / bnorm < eps)

             break;


         // move to the next iteration: store previous projection

         rho_old = rho_new;

     }


     return k;

 }


 template <typename TFunctor1, typename TFunctor2>

 int bicgstab_callbacks (

     const cArrayView b, cArrayView x,

     double eps,

     int min_iterations, int max_iterations,

     TFunctor1 apply_preconditioner,

     TFunctor2 matrix_multiply,

     bool verbose = false

 ) {

     std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();

     std::chrono::duration<int> sec;


     int N = b.size();

     double bnorm = b.norm();


     cArray x_im1(N), r_im1(N), rt(N), p_i(N), p_im1(N), v_im1(N), phat(N), v_i(N), s(N), shat(N), t(N), x_i(N), r_i(N);

     Complex rho_im1, rho_im2, beta, alpha_i, alpha_im1, omega_i, omega_im1;


     x_im1 = x;

     matrix_multiply(x_im1,r_im1);

     rt = r_im1 = b - r_im1;


     int i;

     for (i = 1; i < max_iterations; i++)

     {

         sec = std::chrono::duration_cast<std::chrono::duration<int>>(std::chrono::steady_clock::now()-start);


         if (verbose)

         {

             std::cout << "\t[Bi-CGSTAB] Residual relative magnitude after "

                     << i << " iterations: " << r_im1.norm() / bnorm

                     << " (" << sec.count()/60 << " min)\n";

         }


         rho_im1 = (rt | r_im1);

         if (std::abs(rho_im1) == 0.)

             throw exception ("[Bi-CGSTAB] Failed, rho = 0.");


         if (i == 1)

         {

             p_i = r_im1;

         }

         else

         {

             beta = (rho_im1 / rho_im2) * (alpha_im1 / omega_im1);

             p_i = r_im1 + beta * (p_im1 - omega_im1 * v_im1);

         }


         apply_preconditioner(p_i, phat);

         matrix_multiply(phat, v_i);

         alpha_i = rho_im1 / (rt | v_i);

         s = r_im1 - alpha_i * v_i;


         if (s.norm() < eps * bnorm)

         {

             x = x_im1 + alpha_i * phat;

             break;

         }


         apply_preconditioner(s, shat);

         matrix_multiply(shat, t);

         omega_i = (t|s) / (t|t);


         x_i = x_im1 + alpha_i * phat + omega_i * s;

         r_i = s - omega_i * t;


         if (r_i.norm() < eps * bnorm)

         {

             x = x_i;

             break;

         }


         if (omega_i == 0.)

             throw exception ("[Bi-CGSTAB] Solver failed, ω = 0.");


         // shift vectors

         x_im1 = std::move(x_i);

         r_im1 = std::move(r_i);

         p_im1 = std::move(p_i);

         v_im1 = std::move(v_i);


         // shift

         rho_im2 = rho_im1;

         alpha_im1 = alpha_i;

         omega_im1 = omega_i;

     }


     return i;

 }


 template <typename TFunctor1, typename TFunctor2>

 int cgs_callbacks (

     const cArrayView b, cArrayView x,

     double eps,

     int min_iterations, int max_iterations,

     TFunctor1 apply_preconditioner,

     TFunctor2 matrix_multiply,

     bool verbose = false

 ) {

     std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();

     std::chrono::duration<int> sec;


     int N = b.size();

     double bnorm = b.norm();


     Complex resid, alpha, beta, rho_1, rho_2;

     cArray r(N), rt(N), p(N), phat(N), q(N), qhat(N), vhat(N), u(N), uhat(N);


     matrix_multiply(x,r);

     rt = r = b - r;


     if (bnorm == 0.)

         bnorm = 1;


     if (r.norm() < eps * bnorm)

         return 0;


     int i;

     for (i = 1; i < max_iterations; i++)

     {

         sec = std::chrono::duration_cast<std::chrono::duration<int>>(std::chrono::steady_clock::now()-start);


         if (verbose)

         {

             std::cout << "\t[cgs] Residual relative magnitude after "

                     << i << " iterations: " << r.norm() / bnorm

                     << " (" << sec.count()/60 << " min)\n";

         }


         rho_1 = (rt | r);


         if (rho_1 == 0.)

         {

             throw exception ("[cgs] Solver failes, ρ = 0.");

         }

         if (i == 1)

         {

             u = r;

             p = u;

         }

         else

         {

             beta = rho_1 / rho_2;

             u = r + beta * q;

             p = u + beta * (q + beta * p);

         }


         apply_preconditioner(p, phat);

         matrix_multiply(phat, vhat);


         alpha = rho_1 / (rt | vhat);

         q = u - alpha * vhat;


         apply_preconditioner(u + q, uhat);

         matrix_multiply(uhat, qhat);


         x += alpha * uhat;

         r -= alpha * qhat;

         rho_2 = rho_1;


         if (r.norm() < eps * bnorm)

             break;

     }


     return i;

 }


 #endif

ArrayView
Array view.
Definition: arrays.h:186

default_new_complex_array
cArray default_new_complex_array(size_t n)
Return new complex array.
Definition: itersolve.h:32

bicgstab_callbacks
int bicgstab_callbacks(const cArrayView b, cArrayView x, double eps, int min_iterations, int max_iterations, TFunctor1 apply_preconditioner, TFunctor2 matrix_multiply, bool verbose=false)
Definition: itersolve.h:277

scalar_product
kernel void scalar_product(global double2 *u, global double2 *v, global double2 *z)
Full scalar product.
Definition: kernels.cl:97

misc.h

complex.h

cgs_callbacks
int cgs_callbacks(const cArrayView b, cArrayView x, double eps, int min_iterations, int max_iterations, TFunctor1 apply_preconditioner, TFunctor2 matrix_multiply, bool verbose=false)
CGS solver.
Definition: itersolve.h:384

cArray
NumberArray< Complex > cArray
Definition: arrays.h:1610

NumberArray
A comfortable number array class.
Definition: arrays.h:171

restrict
#define restrict
Definition: misc.h:88

NumberArray::norm
double norm() const
Compute usual 2-norm.
Definition: arrays.h:972

arrays.h

ArrayView::norm
double norm() const
Two-norm (defined only for scalar data type).
Definition: arrays.h:304

ArrayView::size
size_t size() const
Length of the array (number of elements).
Definition: arrays.h:276

matrix.h

cg_callbacks
unsigned cg_callbacks(const TArrayView b, TArrayView x, double eps, unsigned min_iterations, unsigned max_iterations, Preconditioner apply_preconditioner, MatrixMultiplication matrix_multiply, bool verbose=true, NewArray new_array=default_new_complex_array, AxbyOperation axby=default_complex_axby, ScalarProduct scalar_product=operator|< Complex >, ComputeNorm compute_norm=default_compute_norm)
Conjugate gradients solver.
Definition: itersolve.h:152

exception
CLArrayView exception

default_compute_norm
double default_compute_norm(const cArrayView x)
Compute norm of an array.
Definition: itersolve.h:43

ArrayView::data
virtual T * data()
Pointer to the data.
Definition: arrays.h:280

abs
rArray abs(const cArrayView u)
Definition: arrays.cpp:19

Complex
std::complex< double > Complex
Definition: complex.h:20

Timer
Timing class.
Definition: misc.h:360

Timer::seconds
unsigned seconds()
Return elapsed time in seconds.
Definition: misc.h:375

default_complex_axby
void default_complex_axby(Complex a, cArrayView x, Complex b, const cArrayView y)
Do the  operation.
Definition: itersolve.h:56