Conversation
The string formatting done in a hot loop ends up being _very_ expensive, 10x slowdown
| // TODO: handle fourth order | ||
| auto set_stencil(const Mesh& localmesh, bool fourth_order) { | ||
| OperatorStencil<IndPerp> stencil; | ||
| IndexOffset<IndPerp> zero; |
There was a problem hiding this comment.
warning: variable 'zero' of type 'IndexOffset' (aka 'IndexOffset<SpecificInd<IND_TYPE::IND_PERP>>') can be declared 'const' [misc-const-correctness]
| IndexOffset<IndPerp> zero; | |
| IndexOffset<IndPerp> const zero; |
| }); | ||
| } | ||
|
|
||
| std::vector offsetsVec(offsets.begin(), offsets.end()); |
There was a problem hiding this comment.
warning: variable 'offsetsVec' of type 'std::vector<IndexOffset<SpecificInd<IND_TYPE::IND_PERP>>, std::allocator<IndexOffset<SpecificInd<IND_TYPE::IND_PERP>>>>' (aka 'vector<IndexOffset<SpecificInd<IND_TYPE::IND_PERP>>, std::allocator<IndexOffset<SpecificInd<IND_TYPE::IND_PERP>>>>') can be declared 'const' [misc-const-correctness]
| std::vector offsetsVec(offsets.begin(), offsets.end()); | |
| std::vector const offsetsVec(offsets.begin(), offsets.end()); |
| } | ||
| } | ||
|
|
||
| stencil.add([](IndPerp UNUSED(ind)) -> bool { return true; }, {zero}); |
There was a problem hiding this comment.
warning: no header providing "UNUSED" is directly included [misc-include-cleaner]
src/invert/laplace/impls/petsc/petsc_laplace.cxx:27:
+ #include "bout/unused.hxx"| fourth_order( | ||
| (*opts)["fourth_order"].doc("Use fourth order stencil").withDefault(false)), | ||
| lib(opts) { | ||
| indexer(std::make_shared<GlobalIndexer<FieldPerp>>( |
There was a problem hiding this comment.
warning: no header providing "std::make_shared" is directly included [misc-include-cleaner]
src/invert/laplace/impls/petsc/petsc_laplace.cxx:27:
+ #include <memory>| } | ||
| } | ||
| // NOTE: Only A0 is the A from setCoefA () | ||
| const BoutReal A0 = A[i]; |
There was a problem hiding this comment.
warning: 'A0' is confusable with 'AO' [misc-confusable-identifiers]
const BoutReal A0 = A[i];
^Additional context
/usr/lib/petscdir/petsc3.22/x86_64-linux-gnu-real/include/petscao.h:19: other declaration found here
typedef struct _p_AO *AO;
^|
|
||
| BoutReal dx = coords->dx[i]; | ||
| BoutReal dx2 = SQ(dx); | ||
| BoutReal dz = coords->dz[i]; |
There was a problem hiding this comment.
warning: variable 'dz' of type 'BoutReal' (aka 'double') can be declared 'const' [misc-const-correctness]
| BoutReal dz = coords->dz[i]; | |
| BoutReal const dz = coords->dz[i]; |
| BoutReal dx = coords->dx[i]; | ||
| BoutReal dx2 = SQ(dx); | ||
| BoutReal dz = coords->dz[i]; | ||
| BoutReal dz2 = SQ(dz); |
There was a problem hiding this comment.
warning: variable 'dz2' of type 'BoutReal' (aka 'double') can be declared 'const' [misc-const-correctness]
| BoutReal dz2 = SQ(dz); | |
| BoutReal const dz2 = SQ(dz); |
| BoutReal dx2 = SQ(dx); | ||
| BoutReal dz = coords->dz[i]; | ||
| BoutReal dz2 = SQ(dz); | ||
| BoutReal dxdz = dx * dz; |
There was a problem hiding this comment.
warning: variable 'dxdz' of type 'BoutReal' (aka 'double') can be declared 'const' [misc-const-correctness]
| BoutReal dxdz = dx * dz; | |
| BoutReal const dxdz = dx * dz; |
|
|
||
| // Need this here to ensure PETSc isn't finalised until after the global mesh, | ||
| // otherwise we get problems from `MPI_Comm_free` on the X communicator | ||
| PetscLib lib{}; |
There was a problem hiding this comment.
warning: variable 'lib' of type 'PetscLib' can be declared 'const' [misc-const-correctness]
| PetscLib lib{}; | |
| PetscLib const lib{}; |
|
|
||
| BoutInitialise(argc, argv); | ||
|
|
||
| PetscLib lib{}; |
There was a problem hiding this comment.
warning: variable 'lib' of type 'PetscLib' can be declared 'const' [misc-const-correctness]
| PetscLib lib{}; | |
| PetscLib const lib{}; |
|
Hmm, the 3D build is failing with
even though I explicitly check that PETSc was compiled with mumps. I guess it's fine to always use |
As well as making this implementation a lot cleaner, this is necessary for the Z parallelisation so that we can use the
GlobalIndexer, otherwise it's really horrible to work out the domain decomposition (as the grid is Z-first and the processors are X-first, and the matrix may have different number of rows on each processor due to the X boundaries).I had to remove the
TRACEmacro from thePetscMatrixas this gets called in a hot loop ((nx * nz)^2times!) and the string formatting really adds up -- yet more reason to removeMsgStack?In debug builds, this has a bit of a weird effect on performance:
next:this PR:
The solve time goes down, but the setup time goes up.