49 #ifndef Intrepid2_TensorBasis_h 50 #define Intrepid2_TensorBasis_h 52 #include <Kokkos_View.hpp> 53 #include <Kokkos_DynRankView.hpp> 55 #include <Teuchos_RCP.hpp> 57 #include <Intrepid2_config.h> 71 template<ordinal_type spaceDim>
72 KOKKOS_INLINE_FUNCTION
73 void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries,
const ordinal_type dkEnum,
const ordinal_type operatorOrder);
76 KOKKOS_INLINE_FUNCTION
77 void getDkEnumerationInverse<1>(Kokkos::Array<int,1> &entries,
const ordinal_type dkEnum,
const ordinal_type operatorOrder)
79 entries[0] = operatorOrder;
83 KOKKOS_INLINE_FUNCTION
84 void getDkEnumerationInverse<2>(Kokkos::Array<int,2> &entries,
const ordinal_type dkEnum,
const ordinal_type operatorOrder)
86 entries[0] = operatorOrder - dkEnum;
91 KOKKOS_INLINE_FUNCTION
92 void getDkEnumerationInverse<3>(Kokkos::Array<int,3> &entries,
const ordinal_type dkEnum,
const ordinal_type operatorOrder)
97 for (ordinal_type yMult=0; yMult<=operatorOrder; yMult++)
99 for (ordinal_type zMult=0; zMult<=operatorOrder-yMult; zMult++)
101 const ordinal_type xMult = operatorOrder-(zMult+yMult);
102 if (dkEnum == getDkEnumeration<3>(xMult,yMult,zMult))
112 template<ordinal_type spaceDim>
116 inline ordinal_type getDkEnumeration<1>(Kokkos::Array<int,1> &entries)
118 return getDkEnumeration<1>(entries[0]);
122 inline ordinal_type getDkEnumeration<2>(Kokkos::Array<int,2> &entries)
124 return getDkEnumeration<2>(entries[0],entries[1]);
128 inline ordinal_type getDkEnumeration<3>(Kokkos::Array<int,3> &entries)
130 return getDkEnumeration<3>(entries[0],entries[1],entries[2]);
133 template<ordinal_type spaceDim1, ordinal_type spaceDim2>
134 inline ordinal_type getDkTensorIndex(
const ordinal_type dkEnum1,
const ordinal_type operatorOrder1,
135 const ordinal_type dkEnum2,
const ordinal_type operatorOrder2)
137 Kokkos::Array<int,spaceDim1> entries1;
138 getDkEnumerationInverse<spaceDim1>(entries1, dkEnum1, operatorOrder1);
140 Kokkos::Array<int,spaceDim2> entries2;
141 getDkEnumerationInverse<spaceDim2>(entries2, dkEnum2, operatorOrder2);
143 const int spaceDim = spaceDim1 + spaceDim2;
144 Kokkos::Array<int,spaceDim> entries;
146 for (ordinal_type d=0; d<spaceDim1; d++)
148 entries[d] = entries1[d];
151 for (ordinal_type d=0; d<spaceDim2; d++)
153 entries[d+spaceDim1] = entries2[d];
156 return getDkEnumeration<spaceDim>(entries);
159 template<
typename BasisBase>
168 std::vector< std::vector<EOperator> > ops;
169 std::vector<double> weights;
170 ordinal_type numBasisComponents_;
172 OperatorTensorDecomposition(
const std::vector<EOperator> &opsBasis1,
const std::vector<EOperator> &opsBasis2,
const std::vector<double> vectorComponentWeights)
174 weights(vectorComponentWeights),
175 numBasisComponents_(2)
177 const ordinal_type size = opsBasis1.size();
178 const ordinal_type opsBasis2Size = opsBasis2.size();
179 const ordinal_type weightsSize = weights.size();
180 INTREPID2_TEST_FOR_EXCEPTION(size != opsBasis2Size, std::invalid_argument,
"opsBasis1.size() != opsBasis2.size()");
181 INTREPID2_TEST_FOR_EXCEPTION(size != weightsSize, std::invalid_argument,
"opsBasis1.size() != weights.size()");
183 for (ordinal_type i=0; i<size; i++)
185 ops.push_back(std::vector<EOperator>{opsBasis1[i],opsBasis2[i]});
189 OperatorTensorDecomposition(
const std::vector< std::vector<EOperator> > &vectorEntryOps,
const std::vector<double> &vectorComponentWeights)
192 weights(vectorComponentWeights)
194 const ordinal_type numVectorComponents = ops.size();
195 const ordinal_type weightsSize = weights.size();
196 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents != weightsSize, std::invalid_argument,
"opsBasis1.size() != weights.size()");
198 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents == 0, std::invalid_argument,
"must have at least one entry!");
200 ordinal_type numBases = 0;
201 for (ordinal_type i=0; i<numVectorComponents; i++)
205 numBases = ops[i].size();
207 else if (ops[i].size() != 0)
209 const ordinal_type opsiSize = ops[i].size();
210 INTREPID2_TEST_FOR_EXCEPTION(numBases != opsiSize, std::invalid_argument,
"must have one operator for each basis in each nontrivial entry in vectorEntryOps");
213 INTREPID2_TEST_FOR_EXCEPTION(numBases == 0, std::invalid_argument,
"at least one vectorEntryOps entry must be non-trivial");
214 numBasisComponents_ = numBases;
221 numBasisComponents_(basisOps.size())
226 ops({ std::vector<EOperator>{opBasis1, opBasis2} }),
228 numBasisComponents_(2)
233 ops({ std::vector<EOperator>{opBasis1, opBasis2, opBasis3} }),
235 numBasisComponents_(3)
238 ordinal_type numVectorComponents()
const 243 ordinal_type numBasisComponents()
const 245 return numBasisComponents_;
248 double weight(
const ordinal_type &vectorComponentOrdinal)
const 250 return weights[vectorComponentOrdinal];
253 bool identicallyZeroComponent(
const ordinal_type &vectorComponentOrdinal)
const 255 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument,
"vectorComponentOrdinal is out of bounds");
256 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument,
"vectorComponentOrdinal is out of bounds");
257 return ops[vectorComponentOrdinal].size() == 0;
260 EOperator op(
const ordinal_type &vectorComponentOrdinal,
const ordinal_type &basisOrdinal)
const 262 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument,
"vectorComponentOrdinal is out of bounds");
263 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument,
"vectorComponentOrdinal is out of bounds");
264 if (identicallyZeroComponent(vectorComponentOrdinal))
270 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal < 0, std::invalid_argument,
"basisOrdinal is out of bounds");
271 INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal >= numBasisComponents_, std::invalid_argument,
"basisOrdinal is out of bounds");
272 return ops[vectorComponentOrdinal][basisOrdinal];
277 template<
typename DeviceType,
typename OutputValueType,
class Po
intValueType>
280 const ordinal_type basesSize = bases.size();
281 INTREPID2_TEST_FOR_EXCEPTION(basesSize != numBasisComponents_, std::invalid_argument,
"The number of bases provided must match the number of basis components in this decomposition");
283 ordinal_type numExpandedBasisComponents = 0;
286 std::vector<TensorBasis*> basesAsTensorBasis(numBasisComponents_);
287 for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
289 TensorBasis* basisAsTensorBasis =
dynamic_cast<TensorBasis*
>(bases[basisComponentOrdinal].get());
290 basesAsTensorBasis[basisComponentOrdinal] = basisAsTensorBasis;
291 if (basisAsTensorBasis)
293 numExpandedBasisComponents += basisAsTensorBasis->getTensorBasisComponents().size();
297 numExpandedBasisComponents += 1;
301 std::vector< std::vector<EOperator> > expandedOps;
302 std::vector<double> expandedWeights;
303 const ordinal_type opsSize = ops.size();
304 for (ordinal_type simpleVectorEntryOrdinal=0; simpleVectorEntryOrdinal<opsSize; simpleVectorEntryOrdinal++)
306 if (identicallyZeroComponent(simpleVectorEntryOrdinal))
308 expandedOps.push_back(std::vector<EOperator>{});
309 expandedWeights.push_back(0.0);
313 std::vector< std::vector<EOperator> > expandedBasisOpsForSimpleVectorEntry(1);
316 auto addExpandedOp = [&expandedBasisOpsForSimpleVectorEntry](
const EOperator &op)
318 const ordinal_type size = expandedBasisOpsForSimpleVectorEntry.size();
319 for (ordinal_type i=0; i<size; i++)
321 expandedBasisOpsForSimpleVectorEntry[i].push_back(op);
327 auto vectorizeExpandedOps = [&expandedBasisOpsForSimpleVectorEntry](
const int &numSubVectors)
330 INTREPID2_TEST_FOR_EXCEPTION(expandedBasisOpsForSimpleVectorEntry.size() != 1, std::invalid_argument,
"multiple basis components may not be vector-valued!");
331 for (ordinal_type i=1; i<numSubVectors; i++)
333 expandedBasisOpsForSimpleVectorEntry.push_back(expandedBasisOpsForSimpleVectorEntry[0]);
337 std::vector<EOperator> subVectorOps;
338 std::vector<double> subVectorWeights {weights[simpleVectorEntryOrdinal]};
339 for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
341 const auto &op = ops[simpleVectorEntryOrdinal][basisComponentOrdinal];
343 if (! basesAsTensorBasis[basisComponentOrdinal])
350 if (basisOpDecomposition.numVectorComponents() > 1)
353 INTREPID2_TEST_FOR_EXCEPTION(subVectorWeights.size() > 1, std::invalid_argument,
"Unhandled case: multiple component bases are vector-valued");
355 ordinal_type numSubVectors = basisOpDecomposition.numVectorComponents();
356 vectorizeExpandedOps(numSubVectors);
358 double weightSoFar = subVectorWeights[0];
359 for (ordinal_type subVectorEntryOrdinal=1; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
361 subVectorWeights.push_back(weightSoFar * basisOpDecomposition.weight(subVectorEntryOrdinal));
363 subVectorWeights[0] *= basisOpDecomposition.weight(0);
364 for (ordinal_type subVectorEntryOrdinal=0; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
366 for (ordinal_type subComponentBasis=0; subComponentBasis<basisOpDecomposition.numBasisComponents(); subComponentBasis++)
368 const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, subComponentBasis);
369 expandedBasisOpsForSimpleVectorEntry[subVectorEntryOrdinal].push_back(basisOp);
375 double componentWeight = basisOpDecomposition.weight(0);
376 const ordinal_type size = subVectorWeights.size();
377 for (ordinal_type i=0; i<size; i++)
379 subVectorWeights[i] *= componentWeight;
381 ordinal_type subVectorEntryOrdinal = 0;
382 const ordinal_type numBasisComponents = basisOpDecomposition.numBasisComponents();
383 for (ordinal_type subComponentBasis=0; subComponentBasis<numBasisComponents; subComponentBasis++)
385 const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, basisComponentOrdinal);
386 addExpandedOp( basisOp );
393 for (ordinal_type i=0; i<static_cast<ordinal_type>(expandedBasisOpsForSimpleVectorEntry.size()); i++)
395 const ordinal_type size = expandedBasisOpsForSimpleVectorEntry[i].size();
396 INTREPID2_TEST_FOR_EXCEPTION(size != numExpandedBasisComponents, std::logic_error,
"each vector in expandedBasisOpsForSimpleVectorEntry should have as many entries as there are expanded basis components");
399 expandedOps.insert(expandedOps.end(), expandedBasisOpsForSimpleVectorEntry.begin(), expandedBasisOpsForSimpleVectorEntry.end());
400 expandedWeights.insert(expandedWeights.end(), subVectorWeights.begin(), subVectorWeights.end());
403 INTREPID2_TEST_FOR_EXCEPTION(expandedOps.size() != expandedWeights.size(), std::logic_error,
"expandedWeights and expandedOps do not agree on the number of vector components");
414 template<
class ExecutionSpace,
class OutputScalar,
class OutputFieldType>
417 using ScratchSpace =
typename ExecutionSpace::scratch_memory_space;
418 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
420 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
421 using TeamMember =
typename TeamPolicy::member_type;
424 using RankCombinationType =
typename TensorViewIteratorType::RankCombinationType;
426 OutputFieldType output_;
427 OutputFieldType input1_;
428 OutputFieldType input2_;
430 int numFields_, numPoints_;
431 int numFields1_, numPoints1_;
432 int numFields2_, numPoints2_;
436 using RankCombinationViewType =
typename TensorViewIteratorType::RankCombinationViewType;
437 RankCombinationViewType rank_combinations_;
443 TensorViewFunctor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2,
444 bool tensorPoints,
double weight)
445 : output_(output), input1_(inputValues1), input2_(inputValues2), tensorPoints_(tensorPoints), weight_(weight)
447 numFields_ = output.extent_int(0);
448 numPoints_ = output.extent_int(1);
450 numFields1_ = inputValues1.extent_int(0);
451 numPoints1_ = inputValues1.extent_int(1);
453 numFields2_ = inputValues2.extent_int(0);
454 numPoints2_ = inputValues2.extent_int(1);
459 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument,
"incompatible point counts");
460 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument,
"incompatible point counts");
464 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_, std::invalid_argument,
"incompatible point counts");
467 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_, std::invalid_argument,
"incompatible field sizes");
469 const ordinal_type max_rank = std::max(inputValues1.rank(),inputValues2.rank());
472 const ordinal_type outputRank = output.rank();
473 INTREPID2_TEST_FOR_EXCEPTION(outputRank > max_rank, std::invalid_argument,
"Unsupported view combination.");
474 rank_combinations_ = RankCombinationViewType(
"Rank_combinations_", max_rank);
475 auto rank_combinations_host = Kokkos::create_mirror_view(rank_combinations_);
477 rank_combinations_host[0] = TensorViewIteratorType::TENSOR_PRODUCT;
478 rank_combinations_host[1] = tensorPoints ? TensorViewIteratorType::TENSOR_PRODUCT : TensorViewIteratorType::DIMENSION_MATCH;
479 for (ordinal_type d=2; d<max_rank; d++)
483 if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == 1))
485 rank_combinations_host[d] = TensorViewIteratorType::TENSOR_CONTRACTION;
487 else if (((inputValues1.extent_int(d) == output.extent_int(d)) && (inputValues2.extent_int(d) == 1))
488 || ((inputValues2.extent_int(d) == output.extent_int(d)) && (inputValues1.extent_int(d) == 1))
493 rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
495 else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d) * inputValues2.extent_int(d)))
498 rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
500 else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d)))
504 rank_combinations_host[d] = TensorViewIteratorType::DIMENSION_MATCH;
508 std::cout <<
"inputValues1.extent_int(" << d <<
") = " << inputValues1.extent_int(d) << std::endl;
509 std::cout <<
"inputValues2.extent_int(" << d <<
") = " << inputValues2.extent_int(d) << std::endl;
510 std::cout <<
"output.extent_int(" << d <<
") = " << output.extent_int(d) << std::endl;
511 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"unable to find an interpretation for this combination of views");
514 Kokkos::deep_copy(rank_combinations_,rank_combinations_host);
517 KOKKOS_INLINE_FUNCTION
518 void operator()(
const TeamMember & teamMember )
const 520 auto fieldOrdinal1 = teamMember.league_rank();
522 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
524 const int FIELD_ORDINAL_DIMENSION = 0;
525 it.
setLocation({fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0});
526 int next_increment_rank = FIELD_ORDINAL_DIMENSION;
527 OutputScalar accumulator = 0;
534 if ((next_increment_rank < 0) || (rank_combinations_[next_increment_rank] != TensorViewIteratorType::TENSOR_CONTRACTION))
541 }
while (it.
increment() > FIELD_ORDINAL_DIMENSION);
559 template<
typename BasisBaseClass =
void>
562 public BasisBaseClass
565 using BasisBase = BasisBaseClass;
566 using BasisPtr = Teuchos::RCP<BasisBase>;
572 std::vector<BasisPtr> tensorComponents_;
576 using DeviceType =
typename BasisBase::DeviceType;
577 using ExecutionSpace =
typename BasisBase::ExecutionSpace;
578 using OutputValueType =
typename BasisBase::OutputValueType;
579 using PointValueType =
typename BasisBase::PointValueType;
581 using OrdinalTypeArray1DHost =
typename BasisBase::OrdinalTypeArray1DHost;
582 using OrdinalTypeArray2DHost =
typename BasisBase::OrdinalTypeArray2DHost;
583 using OutputViewType =
typename BasisBase::OutputViewType;
584 using PointViewType =
typename BasisBase::PointViewType;
591 Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace = FUNCTION_SPACE_MAX)
593 basis1_(basis1),basis2_(basis2)
595 this->functionSpace_ = functionSpace;
600 auto basis1Components = basis1AsTensor->getTensorBasisComponents();
601 tensorComponents_.insert(tensorComponents_.end(), basis1Components.begin(), basis1Components.end());
605 tensorComponents_.push_back(basis1_);
611 auto basis2Components = basis2AsTensor->getTensorBasisComponents();
612 tensorComponents_.insert(tensorComponents_.end(), basis2Components.begin(), basis2Components.end());
616 tensorComponents_.push_back(basis2_);
619 this->basisCardinality_ = basis1->getCardinality() * basis2->getCardinality();
620 this->basisDegree_ = std::max(basis1->getDegree(), basis2->getDegree());
623 std::ostringstream basisName;
624 basisName << basis1->getName() <<
" x " << basis2->getName();
625 name_ = basisName.str();
629 shards::CellTopology cellTopo1 = basis1->getBaseCellTopology();
630 shards::CellTopology cellTopo2 = basis2->getBaseCellTopology();
632 auto cellKey1 = basis1->getBaseCellTopology().getKey();
633 auto cellKey2 = basis2->getBaseCellTopology().getKey();
634 if ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key))
636 this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() );
638 else if (((cellKey1 == shards::Quadrilateral<4>::key) && (cellKey2 == shards::Line<2>::key))
639 || ((cellKey2 == shards::Quadrilateral<4>::key) && (cellKey1 == shards::Line<2>::key)))
641 this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Hexahedron<8> >() );
645 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Cell topology combination not yet supported");
648 this->basisType_ = basis1->getBasisType();
649 this->basisCoordinates_ = COORDINATES_CARTESIAN;
653 const auto & cardinality = this->basisCardinality_;
656 const ordinal_type tagSize = 4;
657 const ordinal_type posScDim = 0;
658 const ordinal_type posScOrd = 1;
659 const ordinal_type posDfOrd = 2;
661 OrdinalTypeArray1DHost tagView(
"tag view", cardinality*tagSize);
663 shards::CellTopology cellTopo = this->basisCellTopology_;
665 ordinal_type tensorSpaceDim = cellTopo.getDimension();
666 ordinal_type spaceDim1 = cellTopo1.getDimension();
667 ordinal_type spaceDim2 = cellTopo2.getDimension();
669 if (this->getBasisType() == BASIS_FEM_HIERARCHICAL)
671 int degreeSize = basis1_->getPolynomialDegreeLength() + basis2_->getPolynomialDegreeLength();
672 this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost(
"TensorBasis - field ordinal polynomial degree", this->basisCardinality_, degreeSize);
677 for (ordinal_type d=0; d<=tensorSpaceDim; d++)
679 ordinal_type d2_max = std::min(spaceDim2,d);
680 int subcellOffset = 0;
681 for (ordinal_type d2=0; d2<=d2_max; d2++)
683 ordinal_type d1 = d-d2;
684 if (d1 > spaceDim1)
continue;
686 ordinal_type subcellCount2 = cellTopo2.getSubcellCount(d2);
687 ordinal_type subcellCount1 = cellTopo1.getSubcellCount(d1);
688 for (ordinal_type subcellOrdinal2=0; subcellOrdinal2<subcellCount2; subcellOrdinal2++)
690 ordinal_type subcellDofCount2 = basis2_->getDofCount(d2, subcellOrdinal2);
691 for (ordinal_type subcellOrdinal1=0; subcellOrdinal1<subcellCount1; subcellOrdinal1++)
693 ordinal_type subcellDofCount1 = basis1_->getDofCount(d1, subcellOrdinal1);
694 ordinal_type tensorLocalDofCount = subcellDofCount1 * subcellDofCount2;
695 for (ordinal_type localDofID2 = 0; localDofID2<subcellDofCount2; localDofID2++)
697 ordinal_type fieldOrdinal2 = basis2_->getDofOrdinal(d2, subcellOrdinal2, localDofID2);
698 OrdinalTypeArray1DHost degreesField2;
699 if (this->basisType_ == BASIS_FEM_HIERARCHICAL) degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
700 for (ordinal_type localDofID1 = 0; localDofID1<subcellDofCount1; localDofID1++)
702 ordinal_type fieldOrdinal1 = basis1_->getDofOrdinal(d1, subcellOrdinal1, localDofID1);
703 ordinal_type tensorLocalDofID = localDofID2 * subcellDofCount1 + localDofID1;
704 ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
705 tagView(tensorFieldOrdinal*tagSize+0) = d;
707 tagView(tensorFieldOrdinal*tagSize+2) = tensorLocalDofID;
708 tagView(tensorFieldOrdinal*tagSize+3) = tensorLocalDofCount;
710 if (this->basisType_ == BASIS_FEM_HIERARCHICAL)
713 OrdinalTypeArray1DHost degreesField1 = basis1_->getPolynomialDegreeOfField(fieldOrdinal1);
715 int degreeLengthField1 = degreesField1.extent_int(0);
716 int degreeLengthField2 = degreesField2.extent_int(0);
717 for (
int d3=0; d3<degreeLengthField1; d3++)
719 this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3) = degreesField1(d3);
721 for (
int d3=0; d3<degreeLengthField2; d3++)
723 this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3+degreeLengthField1) = degreesField2(d3);
730 subcellOffset += subcellCount1 * subcellCount2;
736 this->setOrdinalTagData(this->tagToOrdinal_,
739 this->basisCardinality_,
753 INTREPID2_TEST_FOR_EXCEPTION(
true, std::logic_error,
"subclasses must override either getSimpleOperatorDecomposition() or getOperatorDecomposition()");
762 std::vector<BasisPtr> componentBases {basis1_, basis2_};
772 const bool operatorSupported = (operatorType == OPERATOR_VALUE) || (operatorType == OPERATOR_GRAD) || (operatorType == OPERATOR_CURL) || (operatorType == OPERATOR_DIV);
773 INTREPID2_TEST_FOR_EXCEPTION(!operatorSupported, std::invalid_argument,
"operator is not supported by allocateBasisValues");
775 ordinal_type numBasisComponents = tensorComponents_.size();
778 const ordinal_type numVectorComponents = opDecomposition.numVectorComponents();
779 const bool useVectorData = numVectorComponents > 1;
783 const int numFamilies = 1;
786 const int familyOrdinal = 0;
787 for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
789 if (!opDecomposition.identicallyZeroComponent(vectorComponentOrdinal))
791 std::vector< Data<OutputValueType,DeviceType> > componentData;
792 for (ordinal_type r=0; r<numBasisComponents; r++)
795 const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
796 auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
808 std::vector< Data<OutputValueType,DeviceType> > componentData;
810 const ordinal_type vectorComponentOrdinal = 0;
811 for (ordinal_type r=0; r<numBasisComponents; r++)
814 const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
815 auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
820 const Kokkos::Array<int,7> extents {componentView.extent_int(0), componentView.extent_int(1), 1,1,1,1,1};
827 std::vector< TensorData<OutputValueType,DeviceType> > tensorDataEntries {tensorData};
835 using BasisBase::getValues;
849 PointViewType & inputPoints1, PointViewType & inputPoints2,
bool &tensorDecompositionSucceeded)
const 851 INTREPID2_TEST_FOR_EXCEPTION(attemptTensorDecomposition, std::invalid_argument,
"tensor decomposition not yet supported");
865 int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
866 int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
868 int totalSpaceDim = inputPoints.extent_int(1);
870 TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim);
874 inputPoints1 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(0,spaceDim1));
875 inputPoints2 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim));
881 tensorDecompositionSucceeded =
false;
892 virtual void getDofCoords(
typename BasisBase::ScalarViewType dofCoords )
const override 894 int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
895 int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
897 using ValueType =
typename BasisBase::ScalarViewType::value_type;
898 using ResultLayout =
typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
899 using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
901 const ordinal_type basisCardinality1 = basis1_->getCardinality();
902 const ordinal_type basisCardinality2 = basis2_->getCardinality();
904 ViewType dofCoords1(
"dofCoords1",basisCardinality1,spaceDim1);
905 ViewType dofCoords2(
"dofCoords2",basisCardinality2,spaceDim2);
907 basis1_->getDofCoords(dofCoords1);
908 basis2_->getDofCoords(dofCoords2);
910 Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
911 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (
const int fieldOrdinal2)
913 for (
int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
915 const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
916 for (
int d1=0; d1<spaceDim1; d1++)
918 dofCoords(fieldOrdinal,d1) = dofCoords1(fieldOrdinal1,d1);
920 for (
int d2=0; d2<spaceDim2; d2++)
922 dofCoords(fieldOrdinal,spaceDim1+d2) = dofCoords2(fieldOrdinal2,d2);
936 virtual void getDofCoeffs(
typename BasisBase::ScalarViewType dofCoeffs )
const override 938 using ValueType =
typename BasisBase::ScalarViewType::value_type;
939 using ResultLayout =
typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
940 using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
942 ViewType dofCoeffs1(
"dofCoeffs1",basis1_->getCardinality());
943 ViewType dofCoeffs2(
"dofCoeffs2",basis2_->getCardinality());
945 basis1_->getDofCoeffs(dofCoeffs1);
946 basis2_->getDofCoeffs(dofCoeffs2);
948 const ordinal_type basisCardinality1 = basis1_->getCardinality();
949 const ordinal_type basisCardinality2 = basis2_->getCardinality();
951 Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
952 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (
const int fieldOrdinal2)
954 for (
int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
956 const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
957 dofCoeffs(fieldOrdinal) = dofCoeffs1(fieldOrdinal1);
958 dofCoeffs(fieldOrdinal) = dofCoeffs2(fieldOrdinal2);
970 return name_.c_str();
982 ordinal_type dkEnum2, ordinal_type operatorOrder2)
const 984 ordinal_type spaceDim1 = basis1_->getBaseCellTopology().getDimension();
985 ordinal_type spaceDim2 = basis2_->getBaseCellTopology().getDimension();
995 return getDkTensorIndex<1, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
997 return getDkTensorIndex<1, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
999 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported dimension combination");
1005 return getDkTensorIndex<2, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1007 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported dimension combination");
1022 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported dimension combination");
1026 std::vector<BasisPtr> getTensorBasisComponents()
const 1028 return tensorComponents_;
1046 const EOperator operatorType = OPERATOR_VALUE )
const override 1050 const ordinal_type numVectorComponents = operatorDecomposition.numVectorComponents();
1051 const bool useVectorData = numVectorComponents > 1;
1052 const ordinal_type numBasisComponents = operatorDecomposition.numBasisComponents();
1054 for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1056 const double weight = operatorDecomposition.weight(vectorComponentOrdinal);
1057 for (ordinal_type basisOrdinal=0; basisOrdinal<numBasisComponents; basisOrdinal++)
1059 const EOperator op = operatorDecomposition.op(vectorComponentOrdinal, basisOrdinal);
1061 if (op != OPERATOR_MAX)
1063 const int vectorFamily = 0;
1064 auto tensorData = useVectorData ? outputValues.
vectorData().getComponent(vectorFamily,vectorComponentOrdinal) : outputValues.
tensorData();
1065 INTREPID2_TEST_FOR_EXCEPTION( ! tensorData.getTensorComponent(basisOrdinal).isValid(), std::invalid_argument,
"Invalid output component encountered");
1073 tensorComponents_[basisOrdinal]->getValues(basisValueView, pointView, op);
1077 if ((weight != 1.0) && (basisOrdinal == 0))
1079 if (basisValueView.rank() == 2)
1081 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1082 Kokkos::parallel_for(
"multiply basisValueView by weight", policy,
1083 KOKKOS_LAMBDA (
const int &fieldOrdinal,
const int &pointOrdinal) {
1084 basisValueView(fieldOrdinal,pointOrdinal) *= weight;
1087 else if (basisValueView.rank() == 3)
1089 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1),basisValueView.extent_int(2)});
1090 Kokkos::parallel_for(
"multiply basisValueView by weight", policy,
1091 KOKKOS_LAMBDA (
const int &fieldOrdinal,
const int &pointOrdinal,
const int &d) {
1092 basisValueView(fieldOrdinal,pointOrdinal,d) *= weight;
1097 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported rank for basisValueView");
1123 void getValues( OutputViewType outputValues,
const PointViewType inputPoints,
1124 const EOperator operatorType = OPERATOR_VALUE )
const override 1127 bool attemptTensorDecomposition =
false;
1128 PointViewType inputPoints1, inputPoints2;
1129 getComponentPoints(inputPoints, attemptTensorDecomposition, inputPoints1, inputPoints2, tensorPoints);
1131 switch (operatorType)
1147 for (
int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1149 int derivativeCountComp1=opOrder-derivativeCountComp2;
1150 EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE :
EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1151 EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE :
EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1153 int spaceDim1 = inputPoints1.extent_int(1);
1154 int spaceDim2 = inputPoints2.extent_int(1);
1156 int dkCardinality1 = (op1 != OPERATOR_VALUE) ?
getDkCardinality(op1, spaceDim1) : 1;
1157 int dkCardinality2 = (op2 != OPERATOR_VALUE) ?
getDkCardinality(op2, spaceDim2) : 1;
1159 int basisCardinality1 = basis1_->getCardinality();
1160 int basisCardinality2 = basis2_->getCardinality();
1162 int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1164 int pointCount1, pointCount2;
1167 pointCount1 = inputPoints1.extent_int(0);
1168 pointCount2 = inputPoints2.extent_int(0);
1172 pointCount1 = totalPointCount;
1173 pointCount2 = totalPointCount;
1176 OutputViewType outputValues1, outputValues2;
1177 if (op1 == OPERATOR_VALUE)
1180 outputValues1 =
getMatchingViewWithLabel(outputValues,
"output values - basis 1",basisCardinality1,pointCount1,dkCardinality1);
1182 if (op2 == OPERATOR_VALUE)
1185 outputValues2 =
getMatchingViewWithLabel(outputValues,
"output values - basis 2",basisCardinality2,pointCount2,dkCardinality2);
1187 basis1_->getValues(outputValues1,inputPoints1,op1);
1188 basis2_->getValues(outputValues2,inputPoints2,op2);
1190 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1191 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1192 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1194 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1196 double weight = 1.0;
1199 for (
int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1201 auto outputValues1_dkEnum1 = (op1 != OPERATOR_VALUE) ? Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL(),dkEnum1)
1202 : Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL());
1203 for (
int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1205 auto outputValues2_dkEnum2 = (op2 != OPERATOR_VALUE) ? Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL(),dkEnum2)
1206 : Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL());
1208 ordinal_type dkTensorIndex =
getTensorDkEnumeration(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1209 auto outputValues_dkTensor = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),dkTensorIndex);
1215 FunctorType functor(outputValues_dkTensor, outputValues1_dkEnum1, outputValues2_dkEnum2, tensorPoints, weight);
1216 Kokkos::parallel_for( policy , functor,
"TensorViewFunctor");
1223 this->
getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1253 const PointViewType inputPoints1,
const PointViewType inputPoints2,
1254 bool tensorPoints)
const 1256 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"one-operator, two-inputPoints getValues should be overridden by TensorBasis subclasses");
1283 const PointViewType inputPoints1,
const EOperator operatorType1,
1284 const PointViewType inputPoints2,
const EOperator operatorType2,
1285 bool tensorPoints,
double weight=1.0)
const 1287 int basisCardinality1 = basis1_->getCardinality();
1288 int basisCardinality2 = basis2_->getCardinality();
1290 int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1292 int pointCount1, pointCount2;
1295 pointCount1 = inputPoints1.extent_int(0);
1296 pointCount2 = inputPoints2.extent_int(0);
1300 pointCount1 = totalPointCount;
1301 pointCount2 = totalPointCount;
1304 int spaceDim1 = inputPoints1.extent_int(1);
1305 int spaceDim2 = inputPoints2.extent_int(1);
1307 INTREPID2_TEST_FOR_EXCEPTION(!tensorPoints && (totalPointCount != inputPoints2.extent_int(0)),
1308 std::invalid_argument,
"If tensorPoints is false, the point counts must match!");
1310 int opRank1 =
getOperatorRank(basis1_->getFunctionSpace(), operatorType1, spaceDim1);
1311 int opRank2 =
getOperatorRank(basis2_->getFunctionSpace(), operatorType2, spaceDim2);
1313 OutputViewType outputValues1, outputValues2;
1318 else if (opRank1 == 1)
1320 outputValues1 =
getMatchingViewWithLabel(outputValues,
"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1324 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported opRank1");
1331 else if (opRank2 == 1)
1333 outputValues2 =
getMatchingViewWithLabel(outputValues,
"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1337 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported opRank2");
1340 basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1341 basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1343 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1344 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1345 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1347 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1351 FunctorType functor(outputValues, outputValues1, outputValues2, tensorPoints, weight);
1352 Kokkos::parallel_for( policy , functor,
"TensorViewFunctor");
1361 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"TensorBasis subclasses must override getHostBasis");
1372 template<
class ExecutionSpace,
class OutputScalar,
class OutputFieldType>
1375 using ScratchSpace =
typename ExecutionSpace::scratch_memory_space;
1376 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1378 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
1379 using TeamMember =
typename TeamPolicy::member_type;
1381 OutputFieldType output_;
1382 OutputFieldType input1_;
1383 OutputFieldType input2_;
1384 OutputFieldType input3_;
1386 int numFields_, numPoints_;
1387 int numFields1_, numPoints1_;
1388 int numFields2_, numPoints2_;
1389 int numFields3_, numPoints3_;
1395 TensorBasis3_Functor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2, OutputFieldType inputValues3,
1396 bool tensorPoints,
double weight)
1397 : output_(output), input1_(inputValues1), input2_(inputValues2), input3_(inputValues3), tensorPoints_(tensorPoints), weight_(weight)
1399 numFields_ = output.extent_int(0);
1400 numPoints_ = output.extent_int(1);
1402 numFields1_ = inputValues1.extent_int(0);
1403 numPoints1_ = inputValues1.extent_int(1);
1405 numFields2_ = inputValues2.extent_int(0);
1406 numPoints2_ = inputValues2.extent_int(1);
1408 numFields3_ = inputValues3.extent_int(0);
1409 numPoints3_ = inputValues3.extent_int(1);
1416 INTREPID2_TEST_FOR_EXCEPTION(inputValues1.rank() > 3, std::invalid_argument,
"ranks greater than 3 not yet supported");
1417 INTREPID2_TEST_FOR_EXCEPTION(inputValues2.rank() > 3, std::invalid_argument,
"ranks greater than 3 not yet supported");
1418 INTREPID2_TEST_FOR_EXCEPTION(inputValues3.rank() > 3, std::invalid_argument,
"ranks greater than 3 not yet supported");
1419 INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues2.rank() == 3), std::invalid_argument,
"two vector-valued input ranks not yet supported");
1420 INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument,
"two vector-valued input ranks not yet supported");
1421 INTREPID2_TEST_FOR_EXCEPTION((inputValues2.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument,
"two vector-valued input ranks not yet supported");
1426 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument,
"incompatible point counts");
1427 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument,
"incompatible point counts");
1428 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints3_, std::invalid_argument,
"incompatible point counts");
1432 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_ * numPoints3_, std::invalid_argument,
"incompatible point counts");
1435 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_ * numFields3_, std::invalid_argument,
"incompatible field sizes");
1438 KOKKOS_INLINE_FUNCTION
1439 void operator()(
const TeamMember & teamMember )
const 1441 auto fieldOrdinal1 = teamMember.league_rank();
1445 if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2))
1447 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1448 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1450 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1451 for (
int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1453 output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1458 else if (input1_.rank() == 3)
1460 int spaceDim = input1_.extent_int(2);
1461 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1462 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1464 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1465 for (
int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1467 for (
int d=0; d<spaceDim; d++)
1469 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal,d) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1475 else if (input2_.rank() == 3)
1477 int spaceDim = input2_.extent_int(2);
1478 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1479 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1481 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1482 for (
int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1484 for (
int d=0; d<spaceDim; d++)
1486 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal,d) * input3_(fieldOrdinal3,pointOrdinal);
1492 else if (input3_.rank() == 3)
1494 int spaceDim = input3_.extent_int(2);
1495 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1496 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1498 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1499 for (
int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1501 for (
int d=0; d<spaceDim; d++)
1503 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal,d);
1516 if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2) )
1518 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1519 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1521 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1522 for (
int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1524 for (
int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1526 for (
int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1528 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1529 output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1536 else if (input1_.rank() == 3)
1538 int spaceDim = input1_.extent_int(2);
1539 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1540 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1542 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1543 for (
int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1545 for (
int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1547 for (
int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1549 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1550 for (
int d=0; d<spaceDim; d++)
1552 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1,d) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1560 else if (input2_.rank() == 3)
1562 int spaceDim = input2_.extent_int(2);
1563 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1564 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1566 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1567 for (
int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1569 for (
int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1571 for (
int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1573 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1574 for (
int d=0; d<spaceDim; d++)
1576 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2,d) * input3_(fieldOrdinal3,pointOrdinal3);
1584 else if (input3_.rank() == 3)
1586 int spaceDim = input3_.extent_int(2);
1587 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (
const int& fieldOrdinal2) {
1588 for (
int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1590 int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1591 for (
int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1593 for (
int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1595 for (
int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1597 int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1598 for (
int d=0; d<spaceDim; d++)
1600 output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3,d);
1617 template<
typename BasisBaseClass =
void>
1621 using BasisBase = BasisBaseClass;
1624 using OutputViewType =
typename BasisBase::OutputViewType;
1625 using PointViewType =
typename BasisBase::PointViewType;
1626 using ScalarViewType =
typename BasisBase::ScalarViewType;
1628 using OutputValueType =
typename BasisBase::OutputValueType;
1629 using PointValueType =
typename BasisBase::PointValueType;
1631 using BasisPtr = Teuchos::RCP<BasisBase>;
1654 std::vector<BasisPtr> componentBases {basis1_, basis2_, basis3_};
1685 const PointViewType inputPoints12,
const PointViewType inputPoints3,
1686 bool tensorPoints)
const override 1690 int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
1691 int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
1693 int totalSpaceDim12 = inputPoints12.extent_int(1);
1695 TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim12);
1699 auto inputPoints1 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(0,spaceDim1));
1700 auto inputPoints2 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim12));
1702 this->
getValues(outputValues, operatorType, inputPoints1, inputPoints2, inputPoints3, tensorPoints);
1709 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"This method does not yet handle tensorPoints=true");
1741 const PointViewType inputPoints1,
const PointViewType inputPoints2,
const PointViewType inputPoints3,
1742 bool tensorPoints)
const = 0;
1772 const PointViewType inputPoints1,
const EOperator operatorType1,
1773 const PointViewType inputPoints2,
const EOperator operatorType2,
1774 const PointViewType inputPoints3,
const EOperator operatorType3,
1775 bool tensorPoints,
double weight=1.0)
const 1777 int basisCardinality1 = basis1_->getCardinality();
1778 int basisCardinality2 = basis2_->getCardinality();
1779 int basisCardinality3 = basis3_->getCardinality();
1781 int spaceDim1 = inputPoints1.extent_int(1);
1782 int spaceDim2 = inputPoints2.extent_int(1);
1783 int spaceDim3 = inputPoints3.extent_int(1);
1785 int totalPointCount;
1786 int pointCount1, pointCount2, pointCount3;
1789 pointCount1 = inputPoints1.extent_int(0);
1790 pointCount2 = inputPoints2.extent_int(0);
1791 pointCount3 = inputPoints3.extent_int(0);
1792 totalPointCount = pointCount1 * pointCount2 * pointCount3;
1796 totalPointCount = inputPoints1.extent_int(0);
1797 pointCount1 = totalPointCount;
1798 pointCount2 = totalPointCount;
1799 pointCount3 = totalPointCount;
1801 INTREPID2_TEST_FOR_EXCEPTION((totalPointCount != inputPoints2.extent_int(0)) || (totalPointCount != inputPoints3.extent_int(0)),
1802 std::invalid_argument,
"If tensorPoints is false, the point counts must match!");
1821 OutputViewType outputValues1, outputValues2, outputValues3;
1825 if ((spaceDim1 == 1) && (operatorType1 == OPERATOR_VALUE))
1832 outputValues1 =
getMatchingViewWithLabel(outputValues,
"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1834 if ((spaceDim2 == 1) && (operatorType2 == OPERATOR_VALUE))
1841 outputValues2 =
getMatchingViewWithLabel(outputValues,
"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1843 if ((spaceDim3 == 1) && (operatorType3 == OPERATOR_VALUE))
1850 outputValues3 =
getMatchingViewWithLabel(outputValues,
"output values - basis 3",basisCardinality3,pointCount3,spaceDim3);
1853 basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1854 basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1855 basis3_->getValues(outputValues3,inputPoints3,operatorType3);
1857 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1858 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1859 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1861 using ExecutionSpace =
typename BasisBase::ExecutionSpace;
1863 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1866 FunctorType functor(outputValues, outputValues1, outputValues2, outputValues3, tensorPoints, weight);
1867 Kokkos::parallel_for( policy , functor,
"TensorBasis3_Functor");
1876 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"TensorBasis3 subclasses must override getHostBasis");
KOKKOS_INLINE_FUNCTION ScalarType getView1Entry()
Teuchos::RCP< Basis< DeviceType, OutputType, PointType > > BasisPtr
Basis Pointer.
ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1, ordinal_type dkEnum2, ordinal_type operatorOrder2) const
Given "Dk" enumeration indices for the component bases, returns a Dk enumeration index for the compos...
Functor for computing values for the TensorBasis class.
KOKKOS_INLINE_FUNCTION ordinal_type getOperatorRank(const EFunctionSpace spaceType, const EOperator operatorType, const ordinal_type spaceDim)
Returns rank of an operator.
View-like interface to tensor points; point components are stored separately; the appropriate coordin...
virtual void getDofCoords(typename BasisBase::ScalarViewType dofCoords) const override
Fills in spatial locations (coordinates) of degrees of freedom (nodes) on the reference cell...
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition, PointViewType &inputPoints1, PointViewType &inputPoints2, bool &tensorDecompositionSucceeded) const
Method to extract component points from composite points.
Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace=FUNCTION_SPACE_MAX)
Constructor.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
For a multi-component tensor basis, specifies the operators to be applied to the components to produc...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints12, const PointViewType inputPoints3, bool tensorPoints) const override
Evaluation of a tensor FEM basis on a reference cell.
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
For two cell topologies whose tensor product is a third, this class establishes a mapping from subcel...
Header function for Intrepid2::Util class and other utility functions.
Implementation of an assert that can safely be called from device code.
BasisPtr< typename Kokkos::HostSpace::device_type, OutputType, PointType > HostBasisPtr
Pointer to a Basis whose device type is on the host (Kokkos::HostSpace::device_type), allowing host access to input and output views, and ensuring host execution of basis evaluation.
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
unsigned getCompositeSubcellOrdinal(unsigned subcell1Dim, unsigned subcell1Ordinal, unsigned subcell2Dim, unsigned subcell2Ordinal)
Map from component subcell ordinals to the corresponding composite subcell ordinal.
KOKKOS_INLINE_FUNCTION ordinal_type getDkCardinality(const EOperator operatorType, const ordinal_type spaceDim)
Returns multiplicities of dx, dy, and dz based on the enumeration of the partial derivative, its order and the space dimension. Inverse of the getDkEnumeration() method.
The data containers in Intrepid2 that support sum factorization and other reduced-data optimizations ...
KOKKOS_INLINE_FUNCTION ordinal_type getDkEnumeration(const ordinal_type xMult, const ordinal_type yMult=-1, const ordinal_type zMult=-1)
Returns the ordinal of a partial derivative of order k based on the multiplicities of the partials dx...
ordinal_type componentPointCount(const ordinal_type &tensorComponentOrdinal) const
Returns the number of points in the indicated component.
EOperator
Enumeration of primitive operators available in Intrepid. Primitive operators act on reconstructed fu...
KOKKOS_INLINE_FUNCTION void set(ScalarType value)
KOKKOS_INLINE_FUNCTION int increment()
KOKKOS_INLINE_FUNCTION void setLocation(const Kokkos::Array< int, 7 > location)
KOKKOS_INLINE_FUNCTION ScalarType getView2Entry()
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
virtual const char * getName() const override
Returns basis name.
virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator operatorType) const
Returns a simple decomposition of the specified operator: what operator(s) should be applied to basis...
TensorDataType & tensorData()
TensorData accessor for single-family scalar data.
KOKKOS_INLINE_FUNCTION ScalarView< PointScalar, DeviceType > getTensorComponent(const ordinal_type &r) const
Returns the requested tensor component.
Implementation of support for traversing component views alongside a view that represents a combinati...
KOKKOS_INLINE_FUNCTION ordinal_type getOperatorOrder(const EOperator operatorType)
Returns order of an operator.
KOKKOS_INLINE_FUNCTION int nextIncrementRank()
OperatorTensorDecomposition expandedDecomposition(std::vector< Teuchos::RCP< Basis< DeviceType, OutputValueType, PointValueType > > > &bases)
takes as argument bases that are components in this decomposition, and decomposes them further if the...
Class that defines mappings from component cell topologies to their tensor product topologies...
virtual void getValues(BasisValues< OutputValueType, DeviceType > outputValues, const TensorPoints< PointValueType, DeviceType > inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell, using point and output value containers that allow pre...
View-like interface to tensor data; tensor components are stored separately and multiplied together a...
const VectorDataType & vectorData() const
VectorData accessor.
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, const PointViewType inputPoints3, const EOperator operatorType3, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
A helper class that allows iteration over three Kokkos Views simultaneously, according to tensor comb...
virtual BasisValues< OutputValueType, DeviceType > allocateBasisValues(TensorPoints< PointValueType, DeviceType > points, const EOperator operatorType=OPERATOR_VALUE) const override
Allocate BasisValues container suitable for passing to the getValues() variant that takes a TensorPoi...
Basis defined as the tensor product of two component bases.
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
Reference-space field values for a basis, designed to support typical vector-valued bases...
Functor for computing values for the TensorBasis3 class.
Header file for the abstract base class Intrepid2::Basis.