46 #ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP 47 #define MUELU_SAPFACTORY_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 52 #include "KokkosKernels_Handle.hpp" 53 #include "KokkosSparse_spgemm.hpp" 54 #include "KokkosSparse_spmv.hpp" 58 #include <Xpetra_Matrix.hpp> 59 #include <Xpetra_IteratorOps.hpp> 65 #include "MueLu_PerfUtils.hpp" 67 #include "MueLu_TentativePFactory.hpp" 68 #include "MueLu_Utilities_kokkos.hpp" 74 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
75 RCP<const ParameterList> SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
const {
76 RCP<ParameterList> validParamList = rcp(
new ParameterList());
78 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 86 #undef SET_VALID_ENTRY 88 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A used during the prolongator smoothing process");
89 validParamList->set< RCP<const FactoryBase> >(
"P", Teuchos::null,
"Tentative prolongator factory");
92 ParameterList norecurse;
93 norecurse.disableRecursiveValidation();
94 validParamList->set<ParameterList> (
"matrixmatrix: kernel params", norecurse,
"MatrixMatrix kernel parameters");
97 return validParamList;
100 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
101 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level& fineLevel, Level& coarseLevel)
const {
102 Input(fineLevel,
"A");
106 RCP<const FactoryBase> initialPFact = GetFactory(
"P");
107 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory(
"Ptent"); }
108 coarseLevel.DeclareInput(
"P", initialPFact.get(),
this);
111 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
112 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& fineLevel, Level& coarseLevel)
const {
113 return BuildP(fineLevel, coarseLevel);
116 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
117 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::BuildP(Level& fineLevel, Level& coarseLevel)
const {
118 FactoryMonitor m(*
this,
"Prolongator smoothing", coarseLevel);
121 DeviceType::execution_space::print_configuration(GetOStream(
Runtime1));
123 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
128 RCP<const FactoryBase> initialPFact = GetFactory(
"P");
129 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory(
"Ptent"); }
130 const ParameterList& pL = GetParameterList();
133 RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,
"A");
134 RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >(
"P", initialPFact.get());
136 if(restrictionMode_) {
137 SubFactoryMonitor m2(*
this,
"Transpose A", coarseLevel);
139 A = Utilities_kokkos::Transpose(*A,
true);
146 RCP<ParameterList> APparams;
147 if(pL.isSublist(
"matrixmatrix: kernel params"))
148 APparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
150 APparams= rcp(
new ParameterList);
151 if (coarseLevel.IsAvailable(
"AP reuse data",
this)) {
152 GetOStream(static_cast<MsgType>(
Runtime0 |
Test)) <<
"Reusing previous AP data" << std::endl;
154 APparams = coarseLevel.Get< RCP<ParameterList> >(
"AP reuse data",
this);
156 if (APparams->isParameter(
"graph"))
157 finalP = APparams->get< RCP<Matrix> >(
"graph");
160 APparams->set(
"compute global constants: temporaries",APparams->get(
"compute global constants: temporaries",
false));
161 APparams->set(
"compute global constants",APparams->get(
"compute global constants",
false));
163 const SC
dampingFactor = as<SC>(pL.get<
double>(
"sa: damping factor"));
164 const LO maxEigenIterations = as<LO>(pL.get<
int>(
"sa: eigenvalue estimate num iterations"));
165 const bool estimateMaxEigen = pL.get<
bool>(
"sa: calculate eigenvalue estimate");
166 const bool useAbsValueRowSum = pL.get<
bool> (
"sa: use rowsumabs diagonal scaling");
167 const bool doQRStep = pL.get<
bool>(
"tentative: calculate qr");
168 const bool enforceConstraints= pL.get<
bool>(
"sa: enforce constraints");
169 const SC userDefinedMaxEigen = as<SC>(pL.get<
double>(
"sa: max eigenvalue"));
172 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints,Exceptions::RuntimeError,
173 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
175 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
179 if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
181 SubFactoryMonitor m2(*
this,
"Eigenvalue estimate", coarseLevel);
182 lambdaMax = A->GetMaxEigenvalueEstimate();
183 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
184 GetOStream(
Statistics1) <<
"Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
185 ( (useAbsValueRowSum) ?
", use rowSumAbs diagonal)" :
", use point diagonal)") << std::endl;
186 Magnitude stopTol = 1e-4;
187 invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
188 if (useAbsValueRowSum)
189 lambdaMax = Utilities_kokkos::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
191 lambdaMax = Utilities_kokkos::PowerMethod(*A,
true, maxEigenIterations, stopTol);
192 A->SetMaxEigenvalueEstimate(lambdaMax);
194 GetOStream(
Statistics1) <<
"Using cached max eigenvalue estimate" << std::endl;
198 lambdaMax = userDefinedMaxEigen;
199 A->SetMaxEigenvalueEstimate(lambdaMax);
204 SubFactoryMonitor m2(*
this,
"Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
206 SubFactoryMonitor m3(*
this,
"Diagonal Extraction", coarseLevel);
207 if (useAbsValueRowSum)
208 GetOStream(
Runtime0) <<
"Using rowSumAbs diagonal" << std::endl;
209 if (invDiag == Teuchos::null)
210 invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A, Teuchos::ScalarTraits<SC>::eps()*100, useAbsValueRowSum);
213 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError,
"Prolongator damping factor needs to be finite.");
216 SubFactoryMonitor m3(*
this,
"Xpetra::IteratorOps::Jacobi", coarseLevel);
218 finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(
Statistics2), std::string(
"MueLu::SaP-") +
toString(coarseLevel.GetLevelID()), APparams);
219 if (enforceConstraints) SatisfyPConstraints( A, finalP);
228 if (!restrictionMode_) {
230 if(!finalP.is_null()) {std::ostringstream oss; oss <<
"P_" << coarseLevel.GetLevelID(); finalP->setObjectLabel(oss.str());}
231 Set(coarseLevel,
"P", finalP);
234 if (Ptent->IsView(
"stridedMaps"))
235 finalP->CreateView(
"stridedMaps", Ptent);
239 RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP,
true);
240 Set(coarseLevel,
"R", R);
241 if(!R.is_null()) {std::ostringstream oss; oss <<
"R_" << coarseLevel.GetLevelID(); R->setObjectLabel(oss.str());}
244 if (Ptent->IsView(
"stridedMaps"))
245 R->CreateView(
"stridedMaps", Ptent,
true);
249 RCP<ParameterList> params = rcp(
new ParameterList());
250 params->set(
"printLoadBalancingInfo",
true);
251 params->set(
"printCommInfo",
true);
273 template<
typename local_matrix_type>
274 struct constraintKernel {
276 using Scalar=
typename local_matrix_type::non_const_value_type;
278 using LO=
typename local_matrix_type::non_const_ordinal_type;
279 using Device=
typename local_matrix_type::device_type;
280 const Scalar zero = Kokkos::ArithTraits<SC>::zero();
281 const Scalar one = Kokkos::ArithTraits<SC>::one();
283 local_matrix_type localP;
288 constraintKernel(LO nPDEs_,local_matrix_type localP_) : nPDEs(nPDEs_), localP(localP_)
295 KOKKOS_INLINE_FUNCTION
296 void operator() (
const size_t rowIdx)
const {
298 auto rowPtr = localP.graph.row_map;
299 auto values = localP.values;
301 bool checkRow =
true;
303 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow =
false;
310 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
311 Rsum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
312 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(zero)) {
314 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += values(entryIdx);
315 values(entryIdx) = zero;
318 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(zero))
319 nPositive(rowIdx, entryIdx%nPDEs) = nPositive(rowIdx, entryIdx%nPDEs) + 1;
321 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
322 ConstraintViolationSum(rowIdx, entryIdx%nPDEs) += (values(entryIdx) - one);
323 values(entryIdx) = one;
332 for (
size_t k=0; k < (size_t) nPDEs; k++) {
334 if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(zero)) {
335 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k);
337 else if (Kokkos::ArithTraits<SC>::real(Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
338 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k)+ (one - Rsum(rowIdx, k));
343 for (
size_t k=0; k < (size_t) nPDEs; k++) {
344 if (Kokkos::ArithTraits<SC>::magnitude(ConstraintViolationSum(rowIdx, k)) != Kokkos::ArithTraits<SC>::magnitude(zero))
349 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
350 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(zero)) {
351 values(entryIdx) = values(entryIdx) +
352 (ConstraintViolationSum(rowIdx, entryIdx%nPDEs)/ (
Scalar (nPositive(rowIdx, entryIdx%nPDEs)) != zero ?
Scalar (nPositive(rowIdx, entryIdx%nPDEs)) : one));
355 for (
size_t k=0; k < (size_t) nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
357 for (
size_t k=0; k < (size_t) nPDEs; k++) Rsum(rowIdx, k) = zero;
358 for (
size_t k=0; k < (size_t) nPDEs; k++) nPositive(rowIdx, k) = 0;
365 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
366 void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::SatisfyPConstraints(
const RCP<Matrix> A, RCP<Matrix>& P)
const {
368 using Device =
typename Matrix::local_matrix_type::device_type;
369 LO nPDEs = A->GetFixedBlockSize();
371 using local_mat_type =
typename Matrix::local_matrix_type;
372 constraintKernel<local_mat_type> myKernel(nPDEs,P->getLocalMatrixDevice() );
380 #endif // HAVE_MUELU_KOKKOS_REFACTOR 381 #endif // MUELU_SAPFACTORY_KOKKOS_DEF_HPP std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Print even more statistics.
Print statistics that do not involve significant additional computation.
MueLu::DefaultScalar Scalar
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Description of what is happening (more verbose)
#define SET_VALID_ENTRY(name)