Actual source code: pipelcg.c

  1: #include <petsc/private/kspimpl.h>
  2: #include <petsc/private/vecimpl.h>

  4: #define offset(j)      PetscMax(((j) - (2*l)), 0)
  5: #define shift(i,j)     ((i) - offset((j)))
  6: #define G(i,j)         (plcg->G[((j)*(2*l+1))+(shift((i),(j))) ])
  7: #define G_noshift(i,j) (plcg->G[((j)*(2*l+1))+(i)])
  8: #define alpha(i)       (plcg->alpha[(i)])
  9: #define gamma(i)       (plcg->gamma[(i)])
 10: #define delta(i)       (plcg->delta[(i)])
 11: #define sigma(i)       (plcg->sigma[(i)])
 12: #define req(i)         (plcg->req[(i)])

 14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
 15: struct KSP_CG_PIPE_L_s {
 16:   PetscInt    l;          /* pipeline depth */
 17:   Vec         *Z;         /* Z vectors (shifted base) */
 18:   Vec         *U;         /* U vectors (unpreconditioned shifted base) */
 19:   Vec         *V;         /* V vectors (original base) */
 20:   Vec         *Q;         /* Q vectors (auxiliary bases) */
 21:   Vec         p;          /* work vector */
 22:   PetscScalar *G;         /* such that Z = VG (band matrix)*/
 23:   PetscScalar *gamma,*delta,*alpha;
 24:   PetscReal   lmin,lmax;  /* min and max eigen values estimates to compute base shifts */
 25:   PetscReal   *sigma;     /* base shifts */
 26:   MPI_Request *req;       /* request array for asynchronous global collective */
 27:   PetscBool   show_rstrt; /* flag to show restart information in output (default: not shown) */
 28: };

 30: /*
 31:   KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.

 33:   This is called once, usually automatically by KSPSolve() or KSPSetUp()
 34:   but can be called directly by KSPSetUp()
 35: */
 36: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
 37: {
 39:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
 40:   PetscInt       l=plcg->l,max_it=ksp->max_it;
 41:   MPI_Comm       comm;

 44:   comm = PetscObjectComm((PetscObject)ksp);
 45:   if (max_it < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: max_it argument must be positive.",((PetscObject)ksp)->type_name);
 46:   if (l < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be positive.",((PetscObject)ksp)->type_name);
 47:   if (l > max_it) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be less than max_it.",((PetscObject)ksp)->type_name);

 49:   KSPSetWorkVecs(ksp,1); /* get work vectors needed by PIPELCG */
 50:   plcg->p = ksp->work[0];

 52:   VecDuplicateVecs(plcg->p,PetscMax(3,l+1),&plcg->Z);
 53:   VecDuplicateVecs(plcg->p,3,&plcg->U);
 54:   VecDuplicateVecs(plcg->p,3,&plcg->V);
 55:   VecDuplicateVecs(plcg->p,3*(l-1)+1,&plcg->Q);
 56:   PetscCalloc1(2,&plcg->alpha);
 57:   PetscCalloc1(l,&plcg->sigma);

 59:   return(0);
 60: }

 62: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
 63: {
 64:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
 65:   PetscInt       l=plcg->l;

 69:   PetscFree(plcg->sigma);
 70:   PetscFree(plcg->alpha);
 71:   VecDestroyVecs(PetscMax(3,l+1),&plcg->Z);
 72:   VecDestroyVecs(3,&plcg->U);
 73:   VecDestroyVecs(3,&plcg->V);
 74:   VecDestroyVecs(3*(l-1)+1,&plcg->Q);
 75:   return(0);
 76: }

 78: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
 79: {

 83:   KSPReset_PIPELCG(ksp);
 84:   KSPDestroyDefault(ksp);
 85:   return(0);
 86: }

 88: static PetscErrorCode KSPSetFromOptions_PIPELCG(PetscOptionItems *PetscOptionsObject,KSP ksp)
 89: {
 91:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
 92:   PetscBool      flag=PETSC_FALSE;

 95:   PetscOptionsHead(PetscOptionsObject,"KSP PIPELCG options");
 96:   PetscOptionsInt("-ksp_pipelcg_pipel","Pipeline length","",plcg->l,&plcg->l,&flag);
 97:   if (!flag) plcg->l = 1;
 98:   PetscOptionsReal("-ksp_pipelcg_lmin","Estimate for smallest eigenvalue","",plcg->lmin,&plcg->lmin,&flag);
 99:   if (!flag) plcg->lmin = 0.0;
100:   PetscOptionsReal("-ksp_pipelcg_lmax","Estimate for largest eigenvalue","",plcg->lmax,&plcg->lmax,&flag);
101:   if (!flag) plcg->lmax = 0.0;
102:   PetscOptionsBool("-ksp_pipelcg_monitor","Output information on restarts when they occur? (default: 0)","",plcg->show_rstrt,&plcg->show_rstrt,&flag);
103:   if (!flag) plcg->show_rstrt = PETSC_FALSE;
104:   PetscOptionsTail();
105:   return(0);
106: }

108: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
109: {

113: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
114:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
115: #else
116:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
117:   *request = MPI_REQUEST_NULL;
118: #endif
119:   return(0);
120: }

122: static PetscErrorCode KSPView_PIPELCG(KSP ksp,PetscViewer viewer)
123: {
124:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
126:   PetscBool      iascii=PETSC_FALSE,isstring=PETSC_FALSE;

129:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
130:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
131:   if (iascii) {
132:     PetscViewerASCIIPrintf(viewer,"  Pipeline depth: %D\n", plcg->l);
133:     PetscViewerASCIIPrintf(viewer,"  Minimal eigenvalue estimate %g\n",plcg->lmin);
134:     PetscViewerASCIIPrintf(viewer,"  Maximal eigenvalue estimate %g\n",plcg->lmax);
135:   } else if (isstring) {
136:     PetscViewerStringSPrintf(viewer,"  Pipeline depth: %D\n", plcg->l);
137:     PetscViewerStringSPrintf(viewer,"  Minimal eigenvalue estimate %g\n",plcg->lmin);
138:     PetscViewerStringSPrintf(viewer,"  Maximal eigenvalue estimate %g\n",plcg->lmax);
139:   }
140:   return(0);
141: }

143: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
144: {
145:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
146:   Mat            A=NULL,Pmat=NULL;
147:   PetscInt       it=0,max_it=ksp->max_it,l=plcg->l,i=0,j=0,k=0;
148:   PetscInt       start=0,middle=0,end=0;
149:   Vec            *Z=plcg->Z,*U=plcg->U,*V=plcg->V,*Q=plcg->Q;
150:   Vec            x=NULL,p=NULL,temp=NULL;
151:   PetscScalar    sum_dummy=0.0,eta=0.0,zeta=0.0,lambda=0.0;
152:   PetscReal      dp=0.0,tmp=0.0,beta=0.0,invbeta2=0.0;
153:   MPI_Comm       comm;

157:   x   = ksp->vec_sol;
158:   p   = plcg->p;

160:   comm = PetscObjectComm((PetscObject)ksp);
161:   PCGetOperators(ksp->pc,&A,&Pmat);

163:   for (it = 0; it < max_it+l; ++it) {
164:     /* ----------------------------------- */
165:     /* Multiplication  z_{it+1} =  Az_{it} */
166:     /* ----------------------------------- */
167:     /* Shift the U vector pointers */
168:     temp = U[2];
169:     for (i = 2; i>0; i--) {
170:       U[i] = U[i-1];
171:     }
172:     U[0] = temp;
173:     if (it < l) {
174:       /* SpMV and Sigma-shift and Prec */
175:       MatMult(A,Z[l-it],U[0]);
176:       VecAXPY(U[0],-sigma(it),U[1]);
177:       KSP_PCApply(ksp,U[0],Z[l-it-1]);
178:       if (it < l-1) {
179:         VecCopy(Z[l-it-1],Q[3*it]);
180:       }
181:     } else {
182:       /* Shift the Z vector pointers */
183:       temp = Z[PetscMax(l,2)];
184:       for (i = PetscMax(l,2); i > 0; --i) {
185:         Z[i] = Z[i-1];
186:       }
187:       Z[0] = temp;
188:       /* SpMV and Prec */
189:       MatMult(A,Z[1],U[0]);
190:       KSP_PCApply(ksp,U[0],Z[0]);
191:     }

193:     /* ----------------------------------- */
194:     /* Adjust the G matrix */
195:     /* ----------------------------------- */
196:     if (it >= l) {
197:       if (it == l) {
198:         /* MPI_Wait for G(0,0),scale V0 and Z and U and Q vectors with 1/beta */
199:         MPI_Wait(&req(0),MPI_STATUS_IGNORE);
200:         beta = PetscSqrtReal(PetscRealPart(G(0,0)));
201:         G(0,0) = 1.0;
202:         VecAXPY(V[0],1.0/beta,p); /* this assumes V[0] to be zero initially */
203:         for (j = 0; j <= PetscMax(l,2); ++j) {
204:           VecScale(Z[j],1.0/beta);
205:         }
206:         for (j = 0; j <= 2; ++j) {
207:           VecScale(U[j],1.0/beta);
208:         }
209:         for (j = 0; j < l-1; ++j) {
210:           VecScale(Q[3*j],1.0/beta);
211:         }
212:       }

214:       /* MPI_Wait until the dot products,started l iterations ago,are completed */
215:       MPI_Wait(&req(it-l+1),MPI_STATUS_IGNORE);
216:       if (it >= 2*l) {
217:         for (j = PetscMax(0,it-3*l+1); j <= it-2*l; j++) {
218:           G(j,it-l+1) = G(it-2*l+1,j+l); /* exploit symmetry in G matrix */
219:         }
220:       }

222:       if (it <= 2*l-1) {
223:         invbeta2 = 1.0 / (beta * beta);
224:         /* Scale columns 1 up to l of G with 1/beta^2 */
225:         for (j = PetscMax(it-3*l+1,0); j <= it-l+1; ++j) {
226:           G(j,it-l+1) *= invbeta2;
227:         }
228:       }

230:       for (j = PetscMax(it-2*l+2,0); j <= it-l; ++j) {
231:         sum_dummy = 0.0;
232:         for (k = PetscMax(it-3*l+1,0); k <= j-1; ++k) {
233:           sum_dummy = sum_dummy + G(k,j) * G(k,it-l+1);
234:         }
235:         G(j,it-l+1) = (G(j,it-l+1) - sum_dummy) / G(j,j);
236:       }

238:       sum_dummy = 0.0;
239:       for (k = PetscMax(it-3*l+1,0); k <= it-l; ++k) {
240:         sum_dummy = sum_dummy + G(k,it-l+1) * G(k,it-l+1);
241:       }

243:       tmp = PetscRealPart(G(it-l+1,it-l+1) - sum_dummy);
244:       /* Breakdown check */
245:       if (tmp < 0) {
246:         if (plcg->show_rstrt) {
247:           PetscPrintf(comm,"Sqrt breakdown in iteration %D: sqrt argument is %e. Iteration was restarted.\n",ksp->its+1,(double)tmp);
248:         }
249:         /* End hanging dot-products in the pipeline before exiting for-loop */
250:         start = it-l+2;
251:         end = PetscMin(it+1,max_it+1);  /* !warning! 'it' can actually be greater than 'max_it' */
252:         for (i = start; i < end; ++i) {
253:           MPI_Wait(&req(i),MPI_STATUS_IGNORE);
254:         }
255:         break;
256:       }
257:       G(it-l+1,it-l+1) = PetscSqrtReal(tmp);

259:       if (it < 2*l) {
260:         if (it == l) {
261:           gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)) / G(it-l,it-l);
262:         } else {
263:           gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)
264:                          - delta(it-l-1) * G(it-l-1,it-l)) / G(it-l,it-l);
265:         }
266:         delta(it-l) = G(it-l+1,it-l+1) / G(it-l,it-l);
267:       } else {
268:         gamma(it-l) = (G(it-l,it-l) * gamma(it-2*l)
269:                        + G(it-l,it-l+1) * delta(it-2*l)
270:                        - G(it-l-1,it-l) * delta(it-l-1)) / G(it-l,it-l);
271:         delta(it-l) = (G(it-l+1,it-l+1) * delta(it-2*l)) / G(it-l,it-l);
272:       }

274:       /* -------------------------------------------------- */
275:       /* Recursively compute the next V, Q, Z and U vectors */
276:       /* -------------------------------------------------- */
277:       /* Shift the V vector pointers */
278:       temp = V[2];
279:       for (i = 2; i>0; i--) {
280:         V[i] = V[i-1];
281:       }
282:       V[0] = temp;

284:       /* Recurrence V vectors */
285:       if (l == 1) {
286:         VecCopy(Z[1],V[0]);
287:       } else {
288:         VecCopy(Q[0],V[0]);
289:       }
290:       if (it == l) {
291:         VecAXPY(V[0],sigma(0)-gamma(it-l),V[1]);
292:       } else {
293:         alpha(0) = sigma(0)-gamma(it-l);
294:         alpha(1) = -delta(it-l-1);
295:         VecMAXPY(V[0],2,&alpha(0),&V[1]);
296:       }
297:       VecScale(V[0],1.0/delta(it-l));

299:       /* Recurrence Q vectors */
300:       for (j = 0; j < l-1; ++j) {
301:         /* Shift the Q vector pointers */
302:         temp = Q[3*j+2];
303:         for (i = 2; i>0; i--) {
304:           Q[3*j+i] = Q[3*j+i-1];
305:         }
306:         Q[3*j] = temp;

308:         if (j < l-2) {
309:           VecCopy(Q[3*(j+1)],Q[3*j]);
310:         } else {
311:           VecCopy(Z[1],Q[3*j]);
312:         }
313:         if (it == l) {
314:           VecAXPY(Q[3*j],sigma(j+1)-gamma(it-l),Q[3*j+1]);
315:         } else {
316:           alpha(0) = sigma(j+1)-gamma(it-l);
317:           alpha(1) = -delta(it-l-1);
318:           VecMAXPY(Q[3*j],2,&alpha(0),&Q[3*j+1]);
319:         }
320:         VecScale(Q[3*j],1.0/delta(it-l));
321:       }

323:       /* Recurrence Z and U vectors */
324:       if (it == l) {
325:         VecAXPY(Z[0],-gamma(it-l),Z[1]);
326:         VecAXPY(U[0],-gamma(it-l),U[1]);
327:       } else {
328:         alpha(0) = -gamma(it-l);
329:         alpha(1) = -delta(it-l-1);
330:         VecMAXPY(Z[0],2,&alpha(0),&Z[1]);
331:         VecMAXPY(U[0],2,&alpha(0),&U[1]);
332:       }
333:       VecScale(Z[0],1.0/delta(it-l));
334:       VecScale(U[0],1.0/delta(it-l));
335:     }

337:     /* ---------------------------------------- */
338:     /* Compute and communicate the dot products */
339:     /* ---------------------------------------- */
340:     if (it < l) {
341:       for (j = 0; j < it+2; ++j) {
342:         (*U[0]->ops->dot_local)(U[0],Z[l-j],&G(j,it+1)); /* dot-products (U[0],Z[j]) */
343:       }
344:       MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,it+1),it+2,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
345:     } else if ((it >= l) && (it < max_it)) {
346:       middle = it-l+2;
347:       end = it+2;
348:       (*U[0]->ops->dot_local)(U[0],V[0],&G(it-l+1,it+1)); /* dot-product (U[0],V[0]) */
349:       for (j = middle; j < end; ++j) {
350:         (*U[0]->ops->dot_local)(U[0],plcg->Z[it+1-j],&G(j,it+1)); /* dot-products (U[0],Z[j]) */
351:       }
352:       MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(it-l+1,it+1),l+1,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
353:     }

355:     /* ----------------------------------------- */
356:     /* Compute solution vector and residual norm */
357:     /* ----------------------------------------- */
358:     if (it >= l) {
359:       if (it == l) {
360:         if (ksp->its != 0) {
361:           ++ ksp->its;
362:         }
363:         eta  = gamma(0);
364:         zeta = beta;
365:         VecCopy(V[1],p);
366:         VecScale(p,1.0/eta);
367:         VecAXPY(x,zeta,p);
368:         dp   = beta;
369:       } else if (it > l) {
370:         k = it-l;
371:         ++ ksp->its;
372:         lambda = delta(k-1)/eta;
373:         eta  = gamma(k) - lambda * delta(k-1);
374:         zeta = -lambda * zeta;
375:         VecScale(p,-delta(k-1)/eta);
376:         VecAXPY(p,1.0/eta,V[1]);
377:         VecAXPY(x,zeta,p);
378:         dp   = PetscAbsScalar(zeta);
379:       }
380:       ksp->rnorm = dp;
381:       KSPLogResidualHistory(ksp,dp);
382:       KSPMonitor(ksp,ksp->its,dp);
383:       (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);

385:       if (ksp->its >= max_it && !ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
386:       if (ksp->reason) {
387:         /* End hanging dot-products in the pipeline before exiting for-loop */
388:         start = it-l+2;
389:         end = PetscMin(it+2,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
390:         for (i = start; i < end; ++i) {
391:           MPI_Wait(&req(i),MPI_STATUS_IGNORE);
392:         }
393:         break;
394:       }
395:     }
396:   } /* End inner for loop */
397:   return(0);
398: }

400: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
401: {
402:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
403:   PetscInt       i=0,j=0,l=plcg->l,max_it=ksp->max_it;

407:   for (i = 0; i < PetscMax(3,l+1); ++i) {
408:     VecSet(plcg->Z[i],0.0);
409:   }
410:   for (i = 1; i < 3; ++i) {
411:     VecSet(plcg->U[i],0.0);
412:   }
413:   for (i = 0; i < 3; ++i) {
414:     VecSet(plcg->V[i],0.0);
415:   }
416:   for (i = 0; i < 3*(l-1)+1; ++i) {
417:     VecSet(plcg->Q[i],0.0);
418:   }
419:   for (j = 0; j < (max_it+1); ++j) {
420:     gamma(j) = 0.0;
421:     delta(j) = 0.0;
422:     for (i = 0; i < (2*l+1); ++i) {
423:       G_noshift(i,j) = 0.0;
424:     }
425:   }
426:   return(0);
427: }

429: /*
430:   KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
431: */
432: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
433: {
435:   KSP_CG_PIPE_L  *plcg = (KSP_CG_PIPE_L*)ksp->data;
436:   Mat            A=NULL,Pmat=NULL;
437:   Vec            b=NULL,x=NULL,p=NULL;
438:   PetscInt       max_it=ksp->max_it,l=plcg->l;
439:   PetscInt       i=0,outer_it=0,curr_guess_zero=0;
440:   PetscReal      lmin=plcg->lmin,lmax=plcg->lmax;
441:   PetscBool      diagonalscale=PETSC_FALSE;
442:   MPI_Comm       comm;

445:   comm = PetscObjectComm((PetscObject)ksp);
446:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
447:   if (diagonalscale) {
448:     SETERRQ1(comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
449:   }

451:   x = ksp->vec_sol;
452:   b = ksp->vec_rhs;
453:   p = plcg->p;

455:   PetscCalloc1((max_it+1)*(2*l+1),&plcg->G);
456:   PetscCalloc1(max_it+1,&plcg->gamma);
457:   PetscCalloc1(max_it+1,&plcg->delta);
458:   PetscCalloc1(max_it+1,&plcg->req);

460:   PCGetOperators(ksp->pc,&A,&Pmat);

462:   for (i = 0; i < l; ++i) {
463:     sigma(i) = (0.5*(lmin+lmax) + (0.5*(lmax-lmin) * PetscCosReal(PETSC_PI*(2.0*i+1.0)/(2.0*l))));
464:   }

466:   ksp->its = 0;
467:   outer_it = 0;
468:   curr_guess_zero = !! ksp->guess_zero;

470:   while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
471:     /* RESTART LOOP */
472:     if (!curr_guess_zero) {
473:       KSP_MatMult(ksp,A,x,plcg->U[0]);  /* u <- b - Ax */
474:       VecAYPX(plcg->U[0],-1.0,b);
475:     } else {
476:       VecCopy(b,plcg->U[0]);            /* u <- b (x is 0) */
477:     }
478:     KSP_PCApply(ksp,plcg->U[0],p);      /* p <- Bu */

480:     if (outer_it > 0) {
481:       /* Re-initialize Z,U,V,Q,gamma,delta,G after restart occurred */
482:       KSPSolve_ReInitData_PIPELCG(ksp);
483:     }

485:     (*plcg->U[0]->ops->dot_local)(plcg->U[0],p,&G(0,0));
486:     MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,0),1,MPIU_SCALAR,MPIU_SUM,comm,&req(0));
487:     VecCopy(p,plcg->Z[l]);

489:     KSPSolve_InnerLoop_PIPELCG(ksp);

491:     if (ksp->reason) break; /* convergence or divergence */
492:     ++ outer_it;
493:     curr_guess_zero = 0;
494:   }

496:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
497:   PetscFree(plcg->G);
498:   PetscFree(plcg->gamma);
499:   PetscFree(plcg->delta);
500:   PetscFree(plcg->req);
501:   return(0);
502: }

504: /*MC
505:     KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method. This method has only a single non-blocking global
506:     reduction per iteration, compared to 2 blocking reductions for standard CG. The reduction is overlapped by the
507:     matrix-vector product and preconditioner application of the next l iterations. The pipeline length l is a parameter
508:     of the method.

510:     Options Database Keys:
511: +   -ksp_pipelcg_pipel - pipelined length
512: .   -ksp_pipelcg_lmin - approximation to the smallest eigenvalue of the preconditioned operator (default: 0.0)
513: .   -ksp_pipelcg_lmax - approximation to the largest eigenvalue of the preconditioned operator (default: 0.0)
514: .   -ksp_pipelcg_monitor - output where/why the method restarts when a sqrt breakdown occurs
515: -   see KSPSolve() for additional options

517:     Level: advanced

519:     Notes:
520:     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
521:     performance of pipelined methods. See the FAQ on the PETSc website for details.

523:     Contributed by:
524:     Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
525:     funded by Flemish Research Foundation (FWO) grant number 12H4617N.

527:     Example usage:
528:     [*] KSP ex2, no preconditioner, pipel = 2, lmin = 0.0, lmax = 8.0 :
529:         $mpiexec -n 14 ./ex2 -m 1000 -n 1000 -ksp_type pipelcg -pc_type none -ksp_norm_type natural
530:         -ksp_rtol 1e-10 -ksp_max_it 1000 -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 8.0 -log_view
531:     [*] SNES ex48, bjacobi preconditioner, pipel = 3, lmin = 0.0, lmax = 2.0, show restart information :
532:         $mpiexec -n 14 ./ex48 -M 150 -P 100 -ksp_type pipelcg -pc_type bjacobi -ksp_rtol 1e-10 -ksp_pipelcg_pipel 3
533:         -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 2.0 -ksp_pipelcg_monitor -log_view

535:     References:
536:     [*] J. Cornelis, S. Cools and W. Vanroose,
537:         "The Communication-Hiding Conjugate Gradient Method with Deep Pipelines"
538:         Submitted to SIAM Journal on Scientific Computing (SISC), 2018.
539:     [*] S. Cools, J. Cornelis and W. Vanroose,
540:         "Numerically Stable Recurrence Relations for the Communication Hiding Pipelined Conjugate Gradient Method"
541:         Submitted to IEEE Transactions on Parallel and Distributed Systems, 2019.

543: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSPCG, KSPPIPECG, KSPPIPECGRR, KSPPGMRES,
544:     KSPPIPEBCGS, KSPSetPCSide()
545: M*/
546: PETSC_EXTERN
547: PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
548: {
550:   KSP_CG_PIPE_L  *plcg = NULL;

553:   PetscNewLog(ksp,&plcg);
554:   ksp->data = (void*)plcg;

556:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
557:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);

559:   ksp->ops->setup          = KSPSetUp_PIPELCG;
560:   ksp->ops->solve          = KSPSolve_PIPELCG;
561:   ksp->ops->reset          = KSPReset_PIPELCG;
562:   ksp->ops->destroy        = KSPDestroy_PIPELCG;
563:   ksp->ops->view           = KSPView_PIPELCG;
564:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
565:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
566:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
567:   return(0);
568: }