MueLu  Version of the Day
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
47 #define MUELU_CLASSICALPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MultiVectorFactory.hpp>
50 #include <Xpetra_VectorFactory.hpp>
51 #include <Xpetra_CrsGraphFactory.hpp>
52 #include <Xpetra_Matrix.hpp>
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_MapFactory.hpp>
56 #include <Xpetra_Vector.hpp>
57 #include <Xpetra_Import.hpp>
58 #include <Xpetra_ImportUtils.hpp>
59 #include <Xpetra_IO.hpp>
60 #include <Xpetra_StridedMapFactory.hpp>
61 
62 #include <Teuchos_OrdinalTraits.hpp>
63 
64 #include "MueLu_MasterList.hpp"
65 #include "MueLu_Monitor.hpp"
66 #include "MueLu_PerfUtils.hpp"
68 #include "MueLu_ClassicalMapFactory.hpp"
69 #include "MueLu_Utilities.hpp"
70 #include "MueLu_AmalgamationInfo.hpp"
71 #include "MueLu_GraphBase.hpp"
72 
73 
74 //#define CMS_DEBUG
75 //#define CMS_DUMP
76 
77 namespace {
78 
79 template<class SC>
80 int Sign(SC val) {
81  using STS = typename Teuchos::ScalarTraits<SC>;
82  typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
83  if(STS::real(val) > MT_ZERO) return 1;
84  else if(STS::real(val) < MT_ZERO) return -1;
85  else return 0;
86 }
87 
88 }// anonymous namepsace
89 
90 namespace MueLu {
91 
92 
93 
94 
95  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
97  RCP<ParameterList> validParamList = rcp(new ParameterList());
98 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
99  SET_VALID_ENTRY("aggregation: deterministic");
100  SET_VALID_ENTRY("aggregation: coloring algorithm");
101  SET_VALID_ENTRY("aggregation: classical scheme");
102 
103  // To know if we need BlockNumber
104  SET_VALID_ENTRY("aggregation: drop scheme");
105  {
106  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
107  validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("direct","ext+i","classical modified"), "aggregation: classical scheme")));
108 
109  }
110 
111 #undef SET_VALID_ENTRY
112  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
113  // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
114  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
115  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
116  validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
117  validParamList->set< RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
118  validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
119  // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
120 
121  return validParamList;
122  }
123 
124  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
126  Input(fineLevel, "A");
127  Input(fineLevel, "Graph");
128  Input(fineLevel, "DofsPerNode");
129  // Input(fineLevel, "UnAmalgamationInfo");
130  Input(fineLevel, "CoarseMap");
131  Input(fineLevel, "FC Splitting");
132 
133  const ParameterList& pL = GetParameterList();
134  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
135  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
136  Input(fineLevel, "BlockNumber");
137  }
138 
139  }
140 
141  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
143  return BuildP(fineLevel, coarseLevel);
144  }
145 
146  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
148  FactoryMonitor m(*this, "Build", coarseLevel);
149  using STS = Teuchos::ScalarTraits<SC>;
150 
151  // We start by assuming that someone did a reasonable strength of connection
152  // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
153 
154  // We begin by getting a MIS (from a graph coloring) and then at that point we need
155  // to start generating entries for the prolongator.
156  RCP<const Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
157  RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel,"CoarseMap");
158  RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel,"FC Splitting");
159  RCP<const GraphBase> graph = Get< RCP<GraphBase> >(fineLevel, "Graph");
160  // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
161  // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
162  RCP<const Import> Importer = A->getCrsGraph()->getImporter();
163  Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
164 
165  // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
166  RCP<Matrix> P;
167  // SC SC_ZERO = STS::zero();
168  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
171  const ParameterList& pL = GetParameterList();
172 
173  // FIXME: This guy doesn't work right now for NumPDEs != 1
174  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError,"ClassicalPFactory: Multiple PDEs per node not supported yet");
175 
176  // NOTE: Let's hope we never need to deal with this case
177  TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()),Exceptions::RuntimeError,"ClassicalPFactory: MPI Ranks > 1 not supported yet");
178 
179  // Do we need ghosts rows of A and myPointType?
180  std::string scheme = pL.get<std::string>("aggregation: classical scheme");
181  bool need_ghost_rows =false;
182  if(scheme == "ext+i")
183  need_ghost_rows=true;
184  else if(scheme == "direct")
185  need_ghost_rows=false;
186  else if(scheme == "classical modified")
187  need_ghost_rows=true;
188  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
189 
190  if (GetVerbLevel() & Statistics1) {
191  GetOStream(Statistics1) << "ClassicalPFactory: scheme = "<<scheme<<std::endl;
192  }
193 
194  // Ghost the FC splitting and grab the data (if needed)
195  RCP<const LocalOrdinalVector> fc_splitting;
196  ArrayRCP<const LO> myPointType;
197  if(Importer.is_null()) {
198  fc_splitting = owned_fc_splitting;
199  }
200  else {
201  RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
202  fc_splitting_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
203  fc_splitting = fc_splitting_nonconst;
204  }
205  myPointType = fc_splitting->getData(0);
206 
207 
208  /* Ghost A (if needed) */
209  RCP<const Matrix> Aghost;
210  RCP<const LocalOrdinalVector> fc_splitting_ghost;
211  ArrayRCP<const LO> myPointType_ghost;
212  RCP<const Import> remoteOnlyImporter;
213  if(need_ghost_rows && !Importer.is_null()){
214  ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
215  size_t numRemote = Importer->getNumRemoteIDs();
216  Array<GO> remoteRows(numRemote);
217  for (size_t i = 0; i < numRemote; i++)
218  remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
219 
220  RCP<const Map> remoteRowMap = MapFactory::Build(lib,Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
221  A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
222 
223  remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
224  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
225  RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs,*remoteOnlyImporter,A->getDomainMap(),remoteOnlyImporter->getTargetMap());
226  Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
227  // CAG: It seems that this isn't actually needed?
228  // We also may need need to ghost myPointType for Aghost
229  // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
230  // if(Importer2.is_null()) {
231  // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
232  // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
233  // fc_splitting_ghost = fc_splitting_ghost_nonconst;
234  // myPointType_ghost = fc_splitting_ghost->getData(0);
235  // }
236  }
237 
238  /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
239  RCP<const Map> coarseMap;
240  if(Importer.is_null())
241  coarseMap = ownedCoarseMap;
242  else {
243  // Generate a domain vector with the coarse ID's as entries for C points
244  GhostCoarseMap(*A,*Importer,myPointType,ownedCoarseMap,coarseMap);
245  }
246 
247  /* Get the block number, if we need it (and ghost it) */
248  RCP<LocalOrdinalVector> BlockNumber;
249  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
250  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
251  RCP<LocalOrdinalVector> OwnedBlockNumber;
252  OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
253  if(Importer.is_null())
254  BlockNumber = OwnedBlockNumber;
255  else{
256  BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
257  BlockNumber->doImport(*OwnedBlockNumber,*Importer,Xpetra::INSERT);
258  }
259  }
260 
261 #if defined(CMS_DEBUG) || defined(CMS_DUMP)
262  {
263  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
264  int rank = A->getRowMap()->getComm()->getRank();
265  printf("[%d] A local size = %dx%d\n",rank,(int)Acrs->getRowMap()->getNodeNumElements(),(int)Acrs->getColMap()->getNodeNumElements());
266 
267  printf("[%d] graph local size = %dx%d\n",rank,(int)graph->GetDomainMap()->getNodeNumElements(),(int)graph->GetImportMap()->getNodeNumElements());
268 
269  std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
270  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
271  graph->print(*fancy,Debug);
272  std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
273  std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
274 
275  // We don't support writing LO vectors in Xpetra (boo!) so....
276  using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
277  using RealValuedMultiVector = typename Xpetra::MultiVector<real_type,LO,GO,NO>;
278  typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
279 
280  RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(),1);
281  ArrayRCP<real_type> mv_data= mv->getDataNonConst(0);
282 
283  // FC Splitting
284  ArrayRCP<const LO> fc_data= fc_splitting->getData(0);
285  for(LO i=0; i<(LO)fc_data.size(); i++)
286  mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
287  Xpetra::IO<real_type,LO,GO,NO>::Write(out_fc,*mv);
288 
289  // Block Number
290  if(!BlockNumber.is_null()) {
291  RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(),1);
292  ArrayRCP<real_type> mv_data2= mv2->getDataNonConst(0);
293  ArrayRCP<const LO> b_data= BlockNumber->getData(0);
294  for(LO i=0; i<(LO)b_data.size(); i++) {
295  mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
296  }
297  Xpetra::IO<real_type,LO,GO,NO>::Write(out_block,*mv2);
298  }
299  }
300 
301 
302 #endif
303 
304 
305  /* Generate reindexing arrays */
306  // Note: cpoint2pcol is ghosted if myPointType is
307  // NOTE: Since the ghosted coarse column map follows the ordering of
308  // the fine column map, this *should* work, because it is in local indices.
309  // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
310  // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
311  Array<LO> cpoint2pcol(myPointType.size(),LO_INVALID);
312  Array<LO> pcol2cpoint(coarseMap->getNodeNumElements(),LO_INVALID);
313  LO num_c_points = 0;
314  LO num_f_points =0;
315  for(LO i=0; i<(LO) myPointType.size(); i++) {
316  if(myPointType[i] == C_PT) {
317  cpoint2pcol[i] = num_c_points;
318  num_c_points++;
319  }
320  else if (myPointType[i] == F_PT)
321  num_f_points++;
322  }
323  for(LO i=0; i<(LO)cpoint2pcol.size(); i++) {
324  if(cpoint2pcol[i] != LO_INVALID)
325  pcol2cpoint[cpoint2pcol[i]] =i;
326  }
327 
328  // Generate edge strength flags (this will make everything easier later)
329  // These do *not* need to be ghosted (unlike A)
330  Teuchos::Array<size_t> eis_rowptr;
331  Teuchos::Array<bool> edgeIsStrong;
332  {
333  SubFactoryMonitor sfm(*this,"Strength Flags",coarseLevel);
334  GenerateStrengthFlags(*A,*graph,eis_rowptr,edgeIsStrong);
335  }
336 
337  // Phase 3: Generate the P matrix
338  RCP<const Map> coarseColMap = coarseMap;
339  RCP<const Map> coarseDomainMap = ownedCoarseMap;
340  if(scheme == "ext+i") {
341  SubFactoryMonitor sfm(*this,"Ext+i Interpolation",coarseLevel);
342  Coarsen_Ext_Plus_I(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
343  }
344  else if(scheme == "direct") {
345  SubFactoryMonitor sfm(*this,"Direct Interpolation",coarseLevel);
346  Coarsen_Direct(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
347  }
348  else if(scheme == "classical modified") {
349  SubFactoryMonitor sfm(*this,"Classical Modified Interpolation",coarseLevel);
350  Coarsen_ClassicalModified(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,remoteOnlyImporter,P);
351  }
352  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
353 
354 #ifdef CMS_DEBUG
355  Xpetra::IO<SC,LO,GO,NO>::Write("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
356  // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
357 #endif
358 
359  // Save output
360  Set(coarseLevel,"P",P);
361  RCP<const CrsGraph> pg = P->getCrsGraph();
362  Set(coarseLevel,"P Graph",pg);
363 
364  //RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
365  // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
366  // Set(coarseLevel, "Nullspace", coarseNullspace);
367 
368  if (IsPrint(Statistics1)) {
369  RCP<ParameterList> params = rcp(new ParameterList());
370  params->set("printLoadBalancingInfo", true);
371  GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
372  }
373  }
374 /* ************************************************************************* */
375 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
377 Coarsen_ClassicalModified(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<const Import> remoteOnlyImporter,RCP<Matrix> & P) const {
378  /* ============================================================= */
379  /* Phase 3 : Classical Modified Interpolation */
380  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
381  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
382  /* 15:115-139 */
383  /* ============================================================= */
384  /* Definitions: */
385  /* F = F-points */
386  /* C = C-points */
387  /* N_i = non-zero neighbors of node i */
388  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
389  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
390  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
391 
392  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
393  /* This guy has a typo. The paper had a \cap instead of \cup */
394  /* I would note that this set can contain both F-points and */
395  /* C-points. They're just weak neighbors of this guy. */
396  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
397 
398 
399  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
400  /* { a_ij, otherwise */
401 
402  /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
403  /* [set of F-neighbors of i that do not share a strong */
404  /* C-neighbor with i] */
405 
406 
407  /* Rewritten Equation (9) on p. 120 */
408  /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
409  /* */
410  /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
411  /* */
412  /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
413 
414  // const point_type F_PT = ClassicalMapFactory::F_PT;
416  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
417  using STS = typename Teuchos::ScalarTraits<SC>;
418  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
419  // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
420  SC SC_ZERO = STS::zero();
421 #ifdef CMS_DEBUG
422  int rank = A.getRowMap()->getComm()->getRank();
423 #endif
424 
425  // Get the block number if we have it.
426  ArrayRCP<const LO> block_id;
427  if(!BlockNumber.is_null())
428  block_id = BlockNumber->getData(0);
429 
430  // Initial (estimated) allocation
431  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
432  // needs to be a copy below.
433  size_t Nrows = A.getNodeNumRows();
434  double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
435  double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
436  // double mean_neighbors_per_row = (double)A.getNodeNumEntries() / Nrows;
437  double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
438 
439  size_t nnz_est = std::max(Nrows,std::min((size_t)A.getNodeNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
440  Array<size_t> tmp_rowptr(Nrows+1);
441  Array<LO> tmp_colind(nnz_est);
442 
443  // Algorithm (count+realloc)
444  // For each row, i,
445  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
446  size_t ct=0;
447  for(LO row=0; row < (LO) Nrows; row++) {
448  size_t row_start = eis_rowptr[row];
449  ArrayView<const LO> indices;
450  ArrayView<const SC> vals;
451  std::set<LO> C_hat;
452  if(myPointType[row] == DIRICHLET_PT) {
453  // Dirichlet points get ignored completely
454  }
455  else if(myPointType[row] == C_PT) {
456  // C-Points get a single 1 in their row
457  C_hat.insert(cpoint2pcol[row]);
458  }
459  else {
460  // F-Points have a more complicated interpolation
461 
462  // C-neighbors of row
463  A.getLocalRowView(row, indices, vals);
464  for(LO j=0; j<indices.size(); j++)
465  if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
466  C_hat.insert(cpoint2pcol[indices[j]]);
467  }// end else
468 
469  // Realloc if needed
470  if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
471  tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
472  }
473 
474  // Copy
475  std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
476  ct+=C_hat.size();
477  tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
478  }
479  // Resize down
480  tmp_colind.resize(tmp_rowptr[Nrows]);
481 
482  RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(),coarseColMap,0);
483  ArrayRCP<size_t> P_rowptr;
484  ArrayRCP<LO> P_colind;
485  ArrayRCP<SC> P_values;
486 
487  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
488  P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
489  P_colind = Teuchos::arcpFromArray(tmp_colind);
490  P_values.resize(P_rowptr[Nrows]);
491  } else {
492  // Make the matrix and then get the graph out of it (necessary for Epetra)
493  // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
494  // impossible to do the obvious way
495  Pcrs->allocateAllValues(tmp_rowptr[Nrows],P_rowptr,P_colind,P_values);
496  std::copy(tmp_rowptr.begin(),tmp_rowptr.end(), P_rowptr.begin());
497  std::copy(tmp_colind.begin(),tmp_colind.end(), P_colind.begin());
498  Pcrs->setAllValues(P_rowptr,P_colind,P_values);
499  Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
500  }
501 
502  // Generate a remote-ghosted version of the graph (if we're in parallel)
503  RCP<const CrsGraph> Pgraph;
504  RCP<const CrsGraph> Pghost;
505  // TODO: We might want to be more efficient here and actually use
506  // Pgraph in the matrix constructor.
507  ArrayRCP<const size_t> Pghost_rowptr;
508  ArrayRCP<const LO> Pghost_colind;
509  if(!remoteOnlyImporter.is_null()) {
510  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
511  RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(),coarseColMap,P_rowptr,P_colind);
512  tempPgraph->fillComplete(coarseDomainMap,A.getDomainMap());
513  Pgraph = tempPgraph;
514  } else {
515  // Epetra does not have a graph constructor that uses rowptr and colind.
516  Pgraph = Pcrs->getCrsGraph();
517  }
518  TEUCHOS_ASSERT(!Pgraph.is_null());
519  Pghost = CrsGraphFactory::Build(Pgraph,*remoteOnlyImporter,Pgraph->getDomainMap(),remoteOnlyImporter->getTargetMap());
520  Pghost->getAllIndices(Pghost_rowptr,Pghost_colind);
521  }
522 
523  // Gustavson-style perfect hashing
524  ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getNodeNumElements(),LO_INVALID);
525 
526  // Get a quick reindexing array from Pghost LCIDs to P LCIDs
527  ArrayRCP<LO> Pghostcol_to_Pcol;
528  if(!Pghost.is_null()) {
529  Pghostcol_to_Pcol.resize(Pghost->getColMap()->getNodeNumElements(),LO_INVALID);
530  for(LO i=0; i<(LO) Pghost->getColMap()->getNodeNumElements(); i++)
531  Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
532  }//end Pghost
533 
534  // Get a quick reindexing array from Aghost LCIDs to A LCIDs
535  ArrayRCP<LO> Aghostcol_to_Acol;
536  if(!Aghost.is_null()) {
537  Aghostcol_to_Acol.resize(Aghost->getColMap()->getNodeNumElements(),LO_INVALID);
538  for(LO i=0; i<(LO)Aghost->getColMap()->getNodeNumElements(); i++)
539  Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
540  }//end Aghost
541 
542 
543  // Algorithm (numeric)
544  for(LO i=0; i < (LO)Nrows; i++) {
545  if(myPointType[i] == DIRICHLET_PT) {
546  // Dirichlet points get ignored completely
547 #ifdef CMS_DEBUG
548  // DEBUG
549  printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n",rank,i);
550 #endif
551  }
552  else if (myPointType[i] == C_PT) {
553  // C Points get a single 1 in their row
554  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
555 #ifdef CMS_DEBUG
556  // DEBUG
557  printf("[%d] ** A(%d,:) is a C-Point.\n",rank,i);
558 #endif
559  }
560  else {
561  // F Points get all of the fancy stuff
562 #ifdef CMS_DEBUG
563  // DEBUG
564  printf("[%d] ** A(%d,:) is a F-Point.\n",rank,i);
565 #endif
566 
567  // Get all of the relevant information about this row
568  ArrayView<const LO> A_indices_i, A_indices_k;
569  ArrayView<const SC> A_vals_i, A_vals_k;
570  A.getLocalRowView(i, A_indices_i, A_vals_i);
571  size_t row_start = eis_rowptr[i];
572 
573  ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
574  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
575 
576  // FIXME: Do we need this?
577  for(LO j=0; j<(LO)P_vals_i.size(); j++)
578  P_vals_i[j] = SC_ZERO;
579 
580  // Stash the hash: Flag any strong C-points with their index into P_colind
581  // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
582  for(LO j=0; j<(LO)P_indices_i.size(); j++) {
583  Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
584  }
585 
586  // Loop over the entries in the row
587  SC first_denominator = SC_ZERO;
588 #ifdef CMS_DEBUG
589  SC a_ii = SC_ZERO;
590 #endif
591  for(LO k0=0; k0<(LO)A_indices_i.size(); k0++) {
592  LO k = A_indices_i[k0];
593  SC a_ik = A_vals_i[k0];
594  LO pcol_k = Acol_to_Pcol[k];
595 
596  if(k == i) {
597  // Case A: Diagonal value (add to first denominator)
598  // FIXME: Add BlockNumber matching here
599  first_denominator += a_ik;
600 #ifdef CMS_DEBUG
601  a_ii = a_ik;
602  printf("- A(%d,%d) is the diagonal\n",i,k);
603 #endif
604 
605  }
606  else if(myPointType[k] == DIRICHLET_PT) {
607  // Case B: Ignore dirichlet connections completely
608 #ifdef CMS_DEBUG
609  printf("- A(%d,%d) is a Dirichlet point\n",i,k);
610 #endif
611 
612  }
613  else if(pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
614  // Case C: a_ik is strong C-Point (goes directly into the weight)
615  P_values[pcol_k] += a_ik;
616 #ifdef CMS_DEBUG
617  printf("- A(%d,%d) is a strong C-Point\n",i,k);
618 #endif
619  }
620  else if (!edgeIsStrong[row_start + k0]) {
621  // Case D: Weak non-Dirichlet neighbor (add to first denominator)
622  if(block_id.size() == 0 || block_id[i] == block_id[k]) {
623  first_denominator += a_ik;
624 #ifdef CMS_DEBUG
625  printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n",i,k,block_id[i],block_id[k],a_ik);
626  }
627  else {
628  printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n",i,k,block_id[i],block_id[k]);
629 #endif
630  }
631 
632  }
633  else {//Case E
634  // Case E: Strong F-Point (adds to the first denominator if we don't share a
635  // a strong C-Point with i; adds to the second denominator otherwise)
636 #ifdef CMS_DEBUG
637  printf("- A(%d,%d) is a strong F-Point\n",i,k);
638 #endif
639 
640  SC a_kk = SC_ZERO;
641  SC second_denominator = SC_ZERO;
642  int sign_akk = 0;
643 
644  if(k < (LO)Nrows) {
645  // Grab the diagonal a_kk
646  A.getLocalRowView(k, A_indices_k, A_vals_k);
647  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
648  LO m = A_indices_k[m0];
649  if(k == m) {
650  a_kk = A_vals_k[m0];
651  break;
652  }
653  }//end for A_indices_k
654 
655  // Compute the second denominator term
656  sign_akk = Sign(a_kk);
657  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
658  LO m = A_indices_k[m0];
659  if(m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
660  SC a_km = A_vals_k[m0];
661  second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
662  }
663  }//end for A_indices_k
664 
665  // Now we have the second denominator, for this particular strong F point.
666  // So we can now add the sum to the w_ij components for the P values
667  if(second_denominator != SC_ZERO) {
668  for(LO j0=0; j0<(LO)A_indices_k.size(); j0++) {
669  LO j = A_indices_k[j0];
670  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
671  // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
672  if(Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
673  SC a_kj = A_vals_k[j0];
674  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
675  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
676 #ifdef CMS_DEBUG
677  printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n",i,j,a_ik * sign_akj_val / second_denominator,P_values[Acol_to_Pcol[j]]);
678 #endif
679  }
680  }//end for A_indices_k
681  }//end if second_denominator != 0
682  else {
683 #ifdef CMS_DEBUG
684  printf("- - A(%d,%d) second denominator is zero\n",i,k);
685 #endif
686  if(block_id.size() == 0 || block_id[i] == block_id[k])
687  first_denominator += a_ik;
688  }//end else second_denominator != 0
689  }// end if k < Nrows
690  else {
691  // Ghost row
692  LO kless = k-Nrows;
693  // Grab the diagonal a_kk
694  // NOTE: ColMap is not locally fitted to the RowMap
695  // so we need to check GIDs here
696  Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
697  GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
698  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
699  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
700  if(k_g == m_g) {
701  a_kk = A_vals_k[m0];
702  break;
703  }
704  }//end for A_indices_k
705 
706  // Compute the second denominator term
707  sign_akk = Sign(a_kk);
708  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
709  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
710  LO mghost = A_indices_k[m0];//Aghost LCID
711  LO m = Aghostcol_to_Acol[mghost]; //A's LID (could be LO_INVALID)
712  if(m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
713  SC a_km = A_vals_k[m0];
714  second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
715  }
716  }//end for A_indices_k
717 
718 
719  // Now we have the second denominator, for this particular strong F point.
720  // So we can now add the sum to the w_ij components for the P values
721  if(second_denominator != SC_ZERO) {
722  for(LO j0=0; j0<(LO)A_indices_k.size(); j0++){
723  LO jghost = A_indices_k[j0];//Aghost LCID
724  LO j = Aghostcol_to_Acol[jghost]; //A's LID (could be LO_INVALID)
725  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
726  if((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
727  SC a_kj = A_vals_k[j0];
728  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
729  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
730 #ifdef CMS_DEBUG
731  printf("- - Unscaled P(%d,A-%d) += %6.4e\n",i,j,a_ik * sign_akj_val / second_denominator);
732 #endif
733  }
734 
735  }//end for A_indices_k
736  }//end if second_denominator != 0
737  else {
738 #ifdef CMS_DEBUG
739  printf("- - A(%d,%d) second denominator is zero\n",i,k);
740 #endif
741  if(block_id.size() == 0 || block_id[i] == block_id[k])
742  first_denominator += a_ik;
743  }//end else second_denominator != 0
744  }//end else k < Nrows
745  }//end else Case A,...,E
746 
747  }//end for A_indices_i
748 
749  // Now, downscale by the first_denominator
750  if(first_denominator != SC_ZERO) {
751  for(LO j0=0; j0<(LO)P_indices_i.size(); j0++) {
752 #ifdef CMS_DEBUG
753  SC old_pij = P_vals_i[j0];
754  P_vals_i[j0] /= -first_denominator;
755  printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n",i,P_indices_i[j0],P_vals_i[j0],old_pij,a_ii,first_denominator - a_ii);
756 #else
757  P_vals_i[j0] /= -first_denominator;
758 #endif
759  }//end for P_indices_i
760  }//end if first_denominator != 0
761 
762  }//end else C-Point
763 
764  }// end if i < Nrows
765 
766  // Finish up
767  Pcrs->setAllValues(P_rowptr,P_colind,P_values);
768  Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
769  // Wrap from CrsMatrix to Matrix
770  P = rcp(new CrsMatrixWrap(Pcrs));
771 
772 }//end Coarsen_ClassicalModified
773 
774 
775 /* ************************************************************************* */
776 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
778 Coarsen_Direct(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
779  /* ============================================================= */
780  /* Phase 3 : Direct Interpolation */
781  /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
782  /* here. Instead we follow: */
783  /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
784  /* with some modifications inspirted by PyAMG */
785  /* ============================================================= */
786  /* Definitions: */
787  /* F = F-points */
788  /* C = C-points */
789  /* N_i = non-zero neighbors of node i */
790  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
791  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
792  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
793  /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
794 
795  /* (A.2.17) from p. 426 */
796  /* a_ij^- = { a_ij, if a_ij < 0 */
797  /* { 0, otherwise */
798  /* a_ij^+ = { a_ij, if a_ij > 0 */
799  /* { 0, otherwise */
800  /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
801  /* [strong C-neighbors with negative edges] */
802  /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
803  /* [strong C-neighbors with positive edges] */
804 
805 
806  /* de Sterck et al., gives us this: */
807  /* Rewritten Equation (6) on p. 119 */
808  /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
809 
810  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
811  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
812  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
813  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
814  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
815  /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
816  /* works. We'll follow the PyAMG implementation in a few important ways. */
817 
819  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
820 
821  // Initial (estimated) allocation
822  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
823  // needs to be a copy below.
824  using STS = typename Teuchos::ScalarTraits<SC>;
825  using MT = typename STS::magnitudeType;
826  using MTS = typename Teuchos::ScalarTraits<MT>;
827  size_t Nrows = A.getNodeNumRows();
828  double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
829  double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
830  // double mean_neighbors_per_row = (double)A.getNodeNumEntries() / Nrows;
831  double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
832 
833  size_t nnz_est = std::max(Nrows,std::min((size_t)A.getNodeNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
834  SC SC_ZERO = STS::zero();
835  MT MT_ZERO = MTS::zero();
836  Array<size_t> tmp_rowptr(Nrows+1);
837  Array<LO> tmp_colind(nnz_est);
838 
839  // Algorithm (count+realloc)
840  // For each row, i,
841  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
842  size_t ct=0;
843  for(LO row=0; row < (LO) Nrows; row++) {
844  size_t row_start = eis_rowptr[row];
845  ArrayView<const LO> indices;
846  ArrayView<const SC> vals;
847  std::set<LO> C_hat;
848  if(myPointType[row] == DIRICHLET_PT) {
849  // Dirichlet points get ignored completely
850  }
851  else if(myPointType[row] == C_PT) {
852  // C-Points get a single 1 in their row
853  C_hat.insert(cpoint2pcol[row]);
854  }
855  else {
856  // F-Points have a more complicated interpolation
857 
858  // C-neighbors of row
859  A.getLocalRowView(row, indices, vals);
860  for(LO j=0; j<indices.size(); j++)
861  if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
862  C_hat.insert(cpoint2pcol[indices[j]]);
863  }// end else
864 
865  // Realloc if needed
866  if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
867  tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
868  }
869 
870  // Copy
871  std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
872  ct+=C_hat.size();
873  tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
874  }
875  // Resize down
876  tmp_colind.resize(tmp_rowptr[Nrows]);
877 
878  // Allocate memory & copy
879  P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
880  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
881  ArrayRCP<size_t> P_rowptr;
882  ArrayRCP<LO> P_colind;
883  ArrayRCP<SC> P_values;
884 
885 #ifdef CMS_DEBUG
886 printf("CMS: Allocating P w/ %d nonzeros\n",(int)tmp_rowptr[Nrows]);
887 #endif
888  PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
889  TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() !=P_rowptr.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (rowptr)");
890  TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() !=P_colind.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (colind)");
891  // FIXME: This can be short-circuited for Tpetra, if we decide we care
892  for(LO i=0; i<(LO)Nrows+1; i++)
893  P_rowptr[i] = tmp_rowptr[i];
894  for(LO i=0; i<(LO)tmp_rowptr[Nrows]; i++)
895  P_colind[i] = tmp_colind[i];
896 
897 
898  // Algorithm (numeric)
899  for(LO i=0; i < (LO)Nrows; i++) {
900  if(myPointType[i] == DIRICHLET_PT) {
901  // Dirichlet points get ignored completely
902 #ifdef CMS_DEBUG
903  // DEBUG
904  printf("** A(%d,:) is a Dirichlet-Point.\n",i);
905 #endif
906  }
907  else if (myPointType[i] == C_PT) {
908  // C Points get a single 1 in their row
909  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
910 #ifdef CMS_DEBUG
911  // DEBUG
912  printf("** A(%d,:) is a C-Point.\n",i);
913 #endif
914  }
915  else {
916  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
917  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
918  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
919  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
920  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
921  ArrayView<const LO> A_indices_i, A_incides_k;
922  ArrayView<const SC> A_vals_i, A_indices_k;
923  A.getLocalRowView(i, A_indices_i, A_vals_i);
924  size_t row_start = eis_rowptr[i];
925 
926  ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
927  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
928 
929 #ifdef CMS_DEBUG
930  // DEBUG
931  {
932  char mylabel[5]="FUCD";
933  char sw[3]="ws";
934  printf("** A(%d,:) = ",i);
935  for(LO j=0; j<(LO)A_indices_i.size(); j++){
936  printf("%6.4e(%d-%c%c) ",A_vals_i[j],A_indices_i[j],mylabel[1+myPointType[A_indices_i[j]]],sw[(int)edgeIsStrong[row_start+j]]);
937  }
938  printf("\n");
939  }
940 #endif
941 
942  SC a_ii = SC_ZERO;
943  SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
944  SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
945  // Find the diagonal and compute the sum ratio
946  for(LO j=0; j<(LO)A_indices_i.size(); j++) {
947  SC a_ik = A_vals_i[j];
948  LO k = A_indices_i[j];
949 
950  // Diagonal
951  if(i == k) {
952  a_ii = a_ik;
953  }
954  // Only strong C-neighbors are in the denomintor
955  if(myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
956  if(STS::real(a_ik) > MT_ZERO) pos_denominator += a_ik;
957  else neg_denominator += a_ik;
958  }
959 
960  // All neighbors are in the numerator
961  // NOTE: As per PyAMG, this does not include the diagonal
962  if(i != k) {
963  if(STS::real(a_ik) > MT_ZERO) pos_numerator += a_ik;
964  else neg_numerator += a_ik;
965  }
966  }
967  SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
968  SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
969  alpha /= -a_ii;
970  beta /= -a_ii;
971 
972  // Loop over the entries
973  for(LO p_j=0; p_j<(LO)P_indices_i.size(); p_j++){
974  LO P_col = pcol2cpoint[P_indices_i[p_j]];
975  SC a_ij = SC_ZERO;
976 
977  // Find A_ij (if it is there)
978  // FIXME: We can optimize this if we assume sorting
979  for(LO a_j =0; a_j<(LO)A_indices_i.size(); a_j++) {
980  if(A_indices_i[a_j] == P_col) {
981  a_ij = A_vals_i[a_j];
982  break;
983  }
984  }
985  SC w_ij = (STS::real(a_ij) < 0 ) ? (alpha * a_ij) : (beta * a_ij);
986 #ifdef CMS_DEBUG
987  SC alpha_or_beta = (STS::real(a_ij) < 0 ) ? alpha : beta;
988  printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n",i,P_indices_i[p_j],pcol2cpoint[P_indices_i[p_j]],alpha_or_beta,a_ij,w_ij);
989 #endif
990  P_vals_i[p_j] = w_ij;
991  }//end for A_indices_i
992  }//end else C_PT
993  }//end for Numrows
994 
995  // Finish up
996  PCrs->setAllValues(P_rowptr, P_colind, P_values);
997  PCrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
998 }
999 
1000 
1001 /* ************************************************************************* */
1002 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1004 Coarsen_Ext_Plus_I(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
1005 
1006  /* ============================================================= */
1007  /* Phase 3 : Extended+i Interpolation */
1008  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
1009  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
1010  /* 15:115-139 */
1011  /* ============================================================= */
1012  /* Definitions: */
1013  /* F = F-points */
1014  /* C = C-points */
1015  /* N_i = non-zero neighbors of node i */
1016  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
1017  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
1018  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
1019  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
1020  /* This guy has a typo. The paper had a \cap instead of \cup */
1021  /* I would note that this set can contain both F-points and */
1022  /* C-points. They're just weak neighbors of this guy. */
1023  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
1024 
1025  /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
1026  /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
1027  /* */
1028 
1029  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
1030  /* { a_ij, otherwise */
1031 
1032 
1033  /* Rewritten Equation (19) on p. 123 */
1034  /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1035  /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
1036  /* for j in \hat{C}_i */
1037 
1038  /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
1039  /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1040  /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
1041  TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"ClassicalPFactory: Ext+i not implemented");
1042 
1043 }
1044 
1045 
1046 
1047 
1048 /* ************************************************************************* */
1049 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1051 GenerateStrengthFlags(const Matrix & A,const GraphBase & graph, Teuchos::Array<size_t> & eis_rowptr,Teuchos::Array<bool> & edgeIsStrong) const {
1052  // To make this easier, we'll create a bool array equal to the nnz in the matrix
1053  // so we know whether each edge is strong or not. This will save us a bunch of
1054  // trying to match the graph and matrix later
1055  size_t Nrows = A.getNodeNumRows();
1056  eis_rowptr.resize(Nrows+1);
1057 
1058  if(edgeIsStrong.size() == 0) {
1059  // Preferred
1060  edgeIsStrong.resize(A.getNodeNumEntries(),false);
1061  }
1062  else {
1063  edgeIsStrong.resize(A.getNodeNumEntries(),false);
1064  edgeIsStrong.assign(A.getNodeNumEntries(),false);
1065  }
1066 
1067  eis_rowptr[0] = 0;
1068  for (LO i=0; i<(LO)Nrows; i++) {
1069  LO rowstart = eis_rowptr[i];
1070  ArrayView<const LO> A_indices;
1071  ArrayView<const SC> A_values;
1072  A.getLocalRowView(i, A_indices, A_values);
1073  LO A_size = (LO) A_indices.size();
1074 
1075  ArrayView<const LO> G_indices = graph.getNeighborVertices(i);
1076  LO G_size = (LO) G_indices.size();
1077 
1078  // Both of these guys should be in the same (sorted) order, but let's check
1079  bool is_ok=true;
1080  for(LO j=0; j<A_size-1; j++)
1081  if (A_indices[j] >= A_indices[j+1]) { is_ok=false; break;}
1082  for(LO j=0; j<G_size-1; j++)
1083  if (G_indices[j] >= G_indices[j+1]) { is_ok=false; break;}
1084  TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError,"ClassicalPFactory: Exected A and Graph to be sorted");
1085 
1086  // Now cycle through and set the flags - if the edge is in G it is strong
1087  for(LO g_idx=0, a_idx=0; g_idx < G_size; g_idx++) {
1088  LO col = G_indices[g_idx];
1089  while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1090  if(a_idx == A_size) {is_ok=false;break;}
1091  edgeIsStrong[rowstart+a_idx] = true;
1092  }
1093 
1094  eis_rowptr[i+1] = eis_rowptr[i] + A_size;
1095  }
1096 }
1097 
1098 
1099 /* ************************************************************************* */
1100 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1102 GhostCoarseMap(const Matrix &A, const Import & Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map> & coarseMap, RCP<const Map> & coarseColMap) const {
1104  const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1105  RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1106  ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1107  LO ct=0;
1108 
1109  for(LO i=0; i<(LO)d_data.size(); i++) {
1110  if(myPointType[i] == C_PT) {
1111  d_data[i] = coarseMap->getGlobalElement(ct);
1112  ct++;
1113  }
1114  else
1115  d_data[i] = GO_INVALID;
1116  }
1117 
1118  // Ghost this guy
1119  RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1120  c_coarseIds->doImport(*d_coarseIds,Importer,Xpetra::INSERT);
1121 
1122  // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1123  // be in Aztec ordering as well, which means we can just condense these guys down
1124  // Overallocate, count and view
1125  ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1126 
1127  Array<GO> c_gids(c_data.size());
1128  LO count=0;
1129 
1130  for(LO i=0; i<(LO)c_data.size(); i++) {
1131  if(c_data[i] != GO_INVALID) {
1132  c_gids[count] = c_data[i];
1133  count++;
1134  }
1135  }
1136  // FIXME: Assumes scalar PDE
1137  std::vector<size_t> stridingInfo_(1);
1138  stridingInfo_[0]=1;
1139  GO domainGIDOffset = 0;
1140 
1141  coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1142  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1143  c_gids.view(0,count),
1144  coarseMap->getIndexBase(),
1145  stridingInfo_,
1146  coarseMap->getComm(),
1147  domainGIDOffset);
1148 
1149 }
1150 
1151 
1152 } //namespace MueLu
1153 
1154 
1155 
1156 #define MUELU_CLASSICALPFACTORY_SHORT
1157 #endif // MUELU_CLASSICALPFACTORY_DEF_HPP
1158 
1159 
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
virtual size_t GetNodeNumVertices() const =0
Return number of vertices owned by the calling node.
virtual size_t GetNodeNumEdges() const =0
Return number of edges owned by the calling node.
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
Print additional debugging information.
Namespace for MueLu classes and methods.
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu representation of a graph.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
#define SET_VALID_ENTRY(name)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void GenerateStrengthFlags(const Matrix &A, const GraphBase &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
typename ClassicalMapFactory::point_type point_type
Exception throws to report errors in the internal logical of the program.
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
virtual Teuchos::ArrayView< const LocalOrdinal > getNeighborVertices(LocalOrdinal v) const =0
Return the list of vertices adjacent to the vertex &#39;v&#39;.