Actual source code: ex192.c

  1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
  2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";

  4: #include <petscmat.h>

  6: int main(int argc,char **args)
  7: {
  8:   Mat            A,RHS,C,F,X,S;
  9:   Vec            u,x,b;
 10:   Vec            xschur,bschur,uschur;
 11:   IS             is_schur;
 13:   PetscMPIInt    size;
 14:   PetscInt       isolver=0,size_schur,m,n,nfact,nsolve,nrhs;
 15:   PetscReal      norm,tol=PETSC_SQRT_MACHINE_EPSILON;
 16:   PetscRandom    rand;
 17:   PetscBool      data_provided,herm,symm,use_lu,cuda = PETSC_FALSE;
 18:   PetscReal      sratio = 5.1/12.;
 19:   PetscViewer    fd;              /* viewer */
 20:   char           solver[256];
 21:   char           file[PETSC_MAX_PATH_LEN]; /* input file name */

 23:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 24:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 25:   if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This is a uniprocessor test");
 26:   /* Determine which type of solver we want to test for */
 27:   herm = PETSC_FALSE;
 28:   symm = PETSC_FALSE;
 29:   PetscOptionsGetBool(NULL,NULL,"-symmetric_solve",&symm,NULL);
 30:   PetscOptionsGetBool(NULL,NULL,"-hermitian_solve",&herm,NULL);
 31:   if (herm) symm = PETSC_TRUE;
 32:   PetscOptionsGetBool(NULL,NULL,"-cuda_solve",&cuda,NULL);
 33:   PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);

 35:   /* Determine file from which we read the matrix A */
 36:   PetscOptionsGetString(NULL,NULL,"-f",file,sizeof(file),&data_provided);
 37:   if (!data_provided) { /* get matrices from PETSc distribution */
 38:     PetscStrncpy(file,"${PETSC_DIR}/share/petsc/datafiles/matrices/",sizeof(file));
 39:     if (symm) {
 40: #if defined (PETSC_USE_COMPLEX)
 41:       PetscStrlcat(file,"hpd-complex-",sizeof(file));
 42: #else
 43:       PetscStrlcat(file,"spd-real-",sizeof(file));
 44: #endif
 45:     } else {
 46: #if defined (PETSC_USE_COMPLEX)
 47:       PetscStrlcat(file,"nh-complex-",sizeof(file));
 48: #else
 49:       PetscStrlcat(file,"ns-real-",sizeof(file));
 50: #endif
 51:     }
 52: #if defined(PETSC_USE_64BIT_INDICES)
 53:     PetscStrlcat(file,"int64-",sizeof(file));
 54: #else
 55:     PetscStrlcat(file,"int32-",sizeof(file));
 56: #endif
 57: #if defined (PETSC_USE_REAL_SINGLE)
 58:     PetscStrlcat(file,"float32",sizeof(file));
 59: #else
 60:     PetscStrlcat(file,"float64",sizeof(file));
 61: #endif
 62:   }
 63:   /* Load matrix A */
 64:   PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);
 65:   MatCreate(PETSC_COMM_WORLD,&A);
 66:   MatLoad(A,fd);
 67:   PetscViewerDestroy(&fd);
 68:   MatGetSize(A,&m,&n);
 69:   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n);

 71:   /* Create dense matrix C and X; C holds true solution with identical columns */
 72:   nrhs = 2;
 73:   PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
 74:   MatCreate(PETSC_COMM_WORLD,&C);
 75:   MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);
 76:   MatSetType(C,MATDENSE);
 77:   MatSetFromOptions(C);
 78:   MatSetUp(C);

 80:   PetscRandomCreate(PETSC_COMM_WORLD,&rand);
 81:   PetscRandomSetFromOptions(rand);
 82:   MatSetRandom(C,rand);
 83:   MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);

 85:   /* Create vectors */
 86:   VecCreate(PETSC_COMM_WORLD,&x);
 87:   VecSetSizes(x,n,PETSC_DECIDE);
 88:   VecSetFromOptions(x);
 89:   VecDuplicate(x,&b);
 90:   VecDuplicate(x,&u); /* save the true solution */

 92:   PetscOptionsGetInt(NULL,NULL,"-solver",&isolver,NULL);
 93:   switch (isolver) {
 94: #if defined(PETSC_HAVE_MUMPS)
 95:     case 0:
 96:       PetscStrcpy(solver,MATSOLVERMUMPS);
 97:       break;
 98: #endif
 99: #if defined(PETSC_HAVE_MKL_PARDISO)
100:     case 1:
101:       PetscStrcpy(solver,MATSOLVERMKL_PARDISO);
102:       break;
103: #endif
104:     default:
105:       PetscStrcpy(solver,MATSOLVERPETSC);
106:       break;
107:   }

109: #if defined (PETSC_USE_COMPLEX)
110:   if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
111:     PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
112:     PetscScalar val = -1.0;
113:     val = val + im;
114:     MatSetValue(A,1,0,val,INSERT_VALUES);
115:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
116:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
117:   }
118: #endif

120:   PetscOptionsGetReal(NULL,NULL,"-schur_ratio",&sratio,NULL);
121:   if (sratio < 0. || sratio > 1.) {
122:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %f", sratio);
123:   }
124:   size_schur = (PetscInt)(sratio*m);

126:   PetscPrintf(PETSC_COMM_SELF,"Solving with %s: nrhs %D, sym %d, herm %d, size schur %D, size mat %D\n",solver,nrhs,symm,herm,size_schur,m);

128:   /* Test LU/Cholesky Factorization */
129:   use_lu = PETSC_FALSE;
130:   if (!symm) use_lu = PETSC_TRUE;
131: #if defined (PETSC_USE_COMPLEX)
132:   if (isolver == 1) use_lu = PETSC_TRUE;
133: #endif
134:   if (cuda && symm && !herm) use_lu = PETSC_TRUE;

136:   if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
137:     MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
138:     MatConvert(A,MATSEQSBAIJ,MAT_INPLACE_MATRIX,&A);
139:   }

141:   if (use_lu) {
142:     MatGetFactor(A,solver,MAT_FACTOR_LU,&F);
143:   } else {
144:     if (herm) {
145:       MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
146:       MatSetOption(A,MAT_SPD,PETSC_TRUE);
147:     } else {
148:       MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
149:       MatSetOption(A,MAT_SPD,PETSC_FALSE);
150:     }
151:     MatGetFactor(A,solver,MAT_FACTOR_CHOLESKY,&F);
152:   }
153:   ISCreateStride(PETSC_COMM_SELF,size_schur,m-size_schur,1,&is_schur);
154:   MatFactorSetSchurIS(F,is_schur);

156:   ISDestroy(&is_schur);
157:   if (use_lu) {
158:     MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
159:   } else {
160:     MatCholeskyFactorSymbolic(F,A,NULL,NULL);
161:   }

163:   for (nfact = 0; nfact < 3; nfact++) {
164:     Mat AD;

166:     if (!nfact) {
167:       VecSetRandom(x,rand);
168:       if (symm && herm) {
169:         VecAbs(x);
170:       }
171:       MatDiagonalSet(A,x,ADD_VALUES);
172:     }
173:     if (use_lu) {
174:       MatLUFactorNumeric(F,A,NULL);
175:     } else {
176:       MatCholeskyFactorNumeric(F,A,NULL);
177:     }
178:     if (cuda) {
179:       MatFactorGetSchurComplement(F,&S,NULL);
180:       MatSetType(S,MATSEQDENSECUDA);
181:       MatCreateVecs(S,&xschur,&bschur);
182:       MatFactorRestoreSchurComplement(F,&S,MAT_FACTOR_SCHUR_UNFACTORED);
183:     }
184:     MatFactorCreateSchurComplement(F,&S,NULL);
185:     if (!cuda) {
186:       MatCreateVecs(S,&xschur,&bschur);
187:     }
188:     VecDuplicate(xschur,&uschur);
189:     if (nfact == 1 && (!cuda || (herm && symm))) {
190:       MatFactorInvertSchurComplement(F);
191:     }
192:     for (nsolve = 0; nsolve < 2; nsolve++) {
193:       VecSetRandom(x,rand);
194:       VecCopy(x,u);

196:       if (nsolve) {
197:         MatMult(A,x,b);
198:         MatSolve(F,b,x);
199:       } else {
200:         MatMultTranspose(A,x,b);
201:         MatSolveTranspose(F,b,x);
202:       }
203:       /* Check the error */
204:       VecAXPY(u,-1.0,x);  /* u <- (-1.0)x + u */
205:       VecNorm(u,NORM_2,&norm);
206:       if (norm > tol) {
207:         PetscReal resi;
208:         if (nsolve) {
209:           MatMult(A,x,u); /* u = A*x */
210:         } else {
211:           MatMultTranspose(A,x,u); /* u = A*x */
212:         }
213:         VecAXPY(u,-1.0,b);  /* u <- (-1.0)b + u */
214:         VecNorm(u,NORM_2,&resi);
215:         if (nsolve) {
216:           PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatSolve error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
217:         } else {
218:           PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatSolveTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
219:         }
220:       }
221:       VecSetRandom(xschur,rand);
222:       VecCopy(xschur,uschur);
223:       if (nsolve) {
224:         MatMult(S,xschur,bschur);
225:         MatFactorSolveSchurComplement(F,bschur,xschur);
226:       } else {
227:         MatMultTranspose(S,xschur,bschur);
228:         MatFactorSolveSchurComplementTranspose(F,bschur,xschur);
229:       }
230:       /* Check the error */
231:       VecAXPY(uschur,-1.0,xschur);  /* u <- (-1.0)x + u */
232:       VecNorm(uschur,NORM_2,&norm);
233:       if (norm > tol) {
234:         PetscReal resi;
235:         if (nsolve) {
236:           MatMult(S,xschur,uschur); /* u = A*x */
237:         } else {
238:           MatMultTranspose(S,xschur,uschur); /* u = A*x */
239:         }
240:         VecAXPY(uschur,-1.0,bschur);  /* u <- (-1.0)b + u */
241:         VecNorm(uschur,NORM_2,&resi);
242:         if (nsolve) {
243:           PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatFactorSolveSchurComplement error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
244:         } else {
245:           PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
246:         }
247:       }
248:     }
249:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&AD);
250:     if (!nfact) {
251:       MatMatMult(AD,C,MAT_INITIAL_MATRIX,2.0,&RHS);
252:     } else {
253:       MatMatMult(AD,C,MAT_REUSE_MATRIX,2.0,&RHS);
254:     }
255:     MatDestroy(&AD);
256:     for (nsolve = 0; nsolve < 2; nsolve++) {
257:       MatMatSolve(F,RHS,X);

259:       /* Check the error */
260:       MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
261:       MatNorm(X,NORM_FROBENIUS,&norm);
262:       if (norm > tol) {
263:         PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);
264:       }
265:     }
266:     if (isolver == 0) {
267:       Mat spRHS,spRHST,RHST;

269:       MatTranspose(RHS,MAT_INITIAL_MATRIX,&RHST);
270:       MatConvert(RHST,MATSEQAIJ,MAT_INITIAL_MATRIX,&spRHST);
271:       MatCreateTranspose(spRHST,&spRHS);
272:       for (nsolve = 0; nsolve < 2; nsolve++) {
273:         MatMatSolve(F,spRHS,X);

275:         /* Check the error */
276:         MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
277:         MatNorm(X,NORM_FROBENIUS,&norm);
278:         if (norm > tol) {
279:           PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) sparse MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);
280:         }
281:       }
282:       MatDestroy(&spRHST);
283:       MatDestroy(&spRHS);
284:       MatDestroy(&RHST);
285:     }
286:     MatDestroy(&S);
287:     VecDestroy(&xschur);
288:     VecDestroy(&bschur);
289:     VecDestroy(&uschur);
290:   }
291:   /* Free data structures */
292:   MatDestroy(&A);
293:   MatDestroy(&C);
294:   MatDestroy(&F);
295:   MatDestroy(&X);
296:   MatDestroy(&RHS);
297:   PetscRandomDestroy(&rand);
298:   VecDestroy(&x);
299:   VecDestroy(&b);
300:   VecDestroy(&u);
301:   PetscFinalize();
302:   return ierr;
303: }

305: /*TEST

307:    testset:
308:      requires: mkl_pardiso double !complex
309:      args: -solver 1

311:      test:
312:        suffix: mkl_pardiso
313:      test:
314:        requires: cuda
315:        suffix: mkl_pardiso_cuda
316:        args: -cuda_solve
317:        output_file: output/ex192_mkl_pardiso.out
318:      test:
319:        suffix: mkl_pardiso_1
320:        args: -symmetric_solve
321:        output_file: output/ex192_mkl_pardiso_1.out
322:      test:
323:        requires: cuda
324:        suffix: mkl_pardiso_cuda_1
325:        args: -symmetric_solve -cuda_solve
326:        output_file: output/ex192_mkl_pardiso_1.out
327:      test:
328:        suffix: mkl_pardiso_3
329:        args: -symmetric_solve -hermitian_solve
330:        output_file: output/ex192_mkl_pardiso_3.out
331:      test:
332:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
333:        suffix: mkl_pardiso_cuda_3
334:        args: -symmetric_solve -hermitian_solve -cuda_solve
335:        output_file: output/ex192_mkl_pardiso_3.out

337:    testset:
338:      requires: mumps double !complex
339:      args: -solver 0

341:      test:
342:        suffix: mumps
343:      test:
344:        requires: cuda
345:        suffix: mumps_cuda
346:        args: -cuda_solve
347:        output_file: output/ex192_mumps.out
348:      test:
349:        suffix: mumps_2
350:        args: -symmetric_solve
351:        output_file: output/ex192_mumps_2.out
352:      test:
353:        requires: cuda
354:        suffix: mumps_cuda_2
355:        args: -symmetric_solve -cuda_solve
356:        output_file: output/ex192_mumps_2.out
357:      test:
358:        suffix: mumps_3
359:        args: -symmetric_solve -hermitian_solve
360:        output_file: output/ex192_mumps_3.out
361:      test:
362:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
363:        suffix: mumps_cuda_3
364:        args: -symmetric_solve -hermitian_solve -cuda_solve
365:        output_file: output/ex192_mumps_3.out

367: TEST*/