GCC Code Coverage Report


Directory: Bembel/src/
File: Bembel/src/H2Matrix/EigenHelper/H2DenseProduct.hpp
Date: 2024-09-30 07:01:38
Exec Total Coverage
Lines: 103 105 98.1%
Functions: 61 61 100.0%
Branches: 71 130 54.6%

Line Branch Exec Source
1 // This file is part of Bembel, the higher order C++ boundary element library.
2 //
3 // Copyright (C) 2022 see <http://www.bembel.eu>
4 //
5 // It was written as part of a cooperation of J. Doelz, H. Harbrecht, S. Kurz,
6 // M. Multerer, S. Schoeps, and F. Wolf at Technische Universitaet Darmstadt,
7 // Universitaet Basel, and Universita della Svizzera italiana, Lugano. This
8 // source code is subject to the GNU General Public License version 3 and
9 // provided WITHOUT ANY WARRANTY, see <http://www.bembel.eu> for further
10 // information.
11 #ifndef BEMBEL_SRC_H2MATRIX_EIGENHELPER_H2DENSEPRODUCT_HPP_
12 #define BEMBEL_SRC_H2MATRIX_EIGENHELPER_H2DENSEPRODUCT_HPP_
13
14 // The contents of this file are a modification of the file
15 // SparseDenseProduct.h from the Eigen library
16 namespace Eigen {
17
18 namespace internal {
19
20 template <typename H2LhsType, typename DenseRhsType, typename DenseResType,
21 typename AlphaType,
22 int LhsStorageOrder =
23 ((H2LhsType::Flags & RowMajorBit) == RowMajorBit) ? RowMajor
24 : ColMajor,
25 bool ColPerCol = ((DenseRhsType::Flags & RowMajorBit) == 0) ||
26 DenseRhsType::ColsAtCompileTime == 1>
27 struct H2_time_dense_product_impl;
28
29 /**
30 * \brief H2-matrix-vector multiplication, works also for matrices by iterating
31 * over the columns.
32 */
33 template <typename ScalarH2, typename DenseRhsType, typename DenseResType,
34 typename AlphaType>
35 struct H2_time_dense_product_impl<H2Matrix<ScalarH2>, DenseRhsType,
36 DenseResType, AlphaType, ColMajor, true> {
37 typedef typename traits<DenseRhsType>::Scalar ScalarRhs;
38 typedef typename traits<DenseResType>::Scalar ScalarRes;
39 303 static void run(const H2Matrix<ScalarH2>& lhs, const DenseRhsType& rhs,
40 DenseResType& res, const AlphaType& alpha) {
41 // get H2-data
42 303 int max_level =
43 303 lhs.get_block_cluster_tree()(0, 0).get_parameters().max_level_;
44 303 int min_cluster_level =
45 303 lhs.get_block_cluster_tree()(0, 0).get_parameters().min_cluster_level_;
46
1/2
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
303 auto moment_matrix = lhs.get_fmm_moment_matrix();
47
1/2
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
303 auto transfer_matrices = lhs.get_fmm_transfer_matrices();
48 303 int vector_dimension = moment_matrix.size();
49
50
2/2
✓ Branch 2 taken 303 times.
✓ Branch 3 taken 303 times.
606 for (Index c = 0; c < rhs.cols(); ++c) {
51 // go discontinuous in rhs
52
1/2
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
303 Matrix<ScalarH2, Dynamic, 1> long_rhs_all =
53
3/6
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 303 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 303 times.
✗ Branch 8 not taken.
606 (lhs.get_transformation_matrix() * rhs.col(c)).eval();
54 303 int vector_component_size = long_rhs_all.rows() / vector_dimension;
55
56 // initialize destination
57
1/2
✓ Branch 2 taken 303 times.
✗ Branch 3 not taken.
303 Matrix<ScalarRes, Dynamic, 1> long_dst_all(long_rhs_all.rows());
58
1/2
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
303 long_dst_all.setZero();
59
60
2/2
✓ Branch 0 taken 460 times.
✓ Branch 1 taken 303 times.
763 for (int col_component = 0; col_component < vector_dimension;
61 ++col_component) {
62
2/2
✓ Branch 3 taken 774 times.
✓ Branch 4 taken 460 times.
1234 for (int row_component = 0; row_component < vector_dimension;
63 ++row_component) {
64
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 Matrix<ScalarRhs, Dynamic, 1> long_rhs = long_rhs_all.segment(
65
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 col_component * vector_component_size, vector_component_size);
66
1/2
✓ Branch 2 taken 774 times.
✗ Branch 3 not taken.
774 Matrix<ScalarRhs, Dynamic, 1> long_dst(long_rhs.rows());
67
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 long_dst.setZero();
68
69 // split long rhs into pieces by reshaping
70
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 Matrix<ScalarRhs, Dynamic, Dynamic> long_rhs_matrix =
71
4/7
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 698 times.
✓ Branch 6 taken 76 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 698 times.
✗ Branch 9 not taken.
1548 Map<Matrix<ScalarRhs, Dynamic, Dynamic>>(
72 774 long_rhs.data(), moment_matrix[col_component].cols(),
73 774 long_rhs.rows() / moment_matrix[col_component].cols());
74
75 // do forward-transformation
76
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 std::vector<Matrix<ScalarRhs, Dynamic, Dynamic>> long_rhs_forward =
77 Bembel::H2Multipole::forwardTransformation(
78 774 moment_matrix[col_component], transfer_matrices,
79 max_level - min_cluster_level, long_rhs_matrix);
80
81 #pragma omp parallel
82 {
83 // initialize target for each process
84
1/2
✓ Branch 2 taken 774 times.
✗ Branch 3 not taken.
774 Matrix<ScalarRes, Dynamic, 1> my_long_dst(long_dst.rows());
85
86 // initialize target of backward-transformation
87 std::vector<Matrix<ScalarRes, Dynamic, Dynamic>>
88 774 my_long_dst_backward;
89
2/2
✓ Branch 1 taken 1548 times.
✓ Branch 2 taken 774 times.
2322 for (int i = 0; i < long_rhs_forward.size(); ++i)
90
3/6
✓ Branch 3 taken 1548 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 1548 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 1548 times.
✗ Branch 10 not taken.
3096 my_long_dst_backward.push_back(
91 Matrix<ScalarRes, Dynamic, Dynamic>::Zero(
92 3096 long_rhs_forward[i].rows(), long_rhs_forward[i].cols()));
93
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 my_long_dst.setZero();
94
95 // matrix-vector
96 774 for (auto leaf =
97 774 lhs.get_block_cluster_tree()(row_component, col_component)
98 774 .clbegin();
99 446598 leaf !=
100 446598 lhs.get_block_cluster_tree()(row_component, col_component)
101
2/2
✓ Branch 1 taken 445824 times.
✓ Branch 2 taken 774 times.
893196 .clend();
102 445824 ++leaf) {
103 #pragma omp single nowait
104 {
105
2/3
✓ Branch 2 taken 295776 times.
✓ Branch 3 taken 150048 times.
✗ Branch 4 not taken.
445824 switch ((*leaf)->get_cc()) {
106 // deal with matrix blocks
107 295776 case Bembel::BlockClusterAdmissibility::Dense: {
108
2/4
✓ Branch 1 taken 295776 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 295776 times.
✗ Branch 5 not taken.
295776 my_long_dst.segment((*leaf)->get_row_start_index(),
109
1/2
✓ Branch 2 taken 295776 times.
✗ Branch 3 not taken.
295776 (*leaf)->get_row_end_index() -
110
2/4
✓ Branch 2 taken 295776 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 295776 times.
✗ Branch 7 not taken.
591552 (*leaf)->get_row_start_index()) +=
111
1/2
✓ Branch 4 taken 295776 times.
✗ Branch 5 not taken.
295776 (*leaf)->get_leaf().get_F() *
112
2/4
✓ Branch 1 taken 295776 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 295776 times.
✗ Branch 5 not taken.
295776 long_rhs.segment((*leaf)->get_col_start_index(),
113
1/2
✓ Branch 2 taken 295776 times.
✗ Branch 3 not taken.
295776 (*leaf)->get_col_end_index() -
114
1/2
✓ Branch 2 taken 295776 times.
✗ Branch 3 not taken.
295776 (*leaf)->get_col_start_index());
115 295776 } break;
116 // deal with low-rank blocks
117 150048 case Bembel::BlockClusterAdmissibility::LowRank: {
118 const Bembel::ElementTreeNode* cluster1 =
119 150048 (*leaf)->get_cluster1();
120 const Bembel::ElementTreeNode* cluster2 =
121 150048 (*leaf)->get_cluster2();
122 150048 int cluster_level = cluster1->get_level();
123 150048 int fmm_level = lhs.get_block_cluster_tree()(0, 0)
124 150048 .get_parameters()
125 300096 .max_level_ -
126 150048 lhs.get_block_cluster_tree()(0, 0)
127 150048 .get_parameters()
128 150048 .min_cluster_level_ -
129 150048 (*leaf)->get_cluster1()->get_level();
130 150048 int cluster1_col = cluster1->id_;
131 150048 int cluster2_col = cluster2->id_;
132
2/4
✓ Branch 2 taken 150048 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 150048 times.
✗ Branch 6 not taken.
150048 my_long_dst_backward[fmm_level].col(cluster1_col) +=
133
1/2
✓ Branch 4 taken 150048 times.
✗ Branch 5 not taken.
150048 (*leaf)->get_leaf().get_F() *
134
1/2
✓ Branch 2 taken 150048 times.
✗ Branch 3 not taken.
150048 long_rhs_forward[fmm_level].col(cluster2_col);
135 150048 } break;
136 // this leaf is not a low-rank block and not a dense block,
137 // thus it is not a leaf -> error
138 default:
139 assert(0 && "This should never happen");
140 break;
141 }
142 }
143 }
144
145 // do backward transformation
146
2/4
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 774 times.
✗ Branch 5 not taken.
774 my_long_dst += Bembel::H2Multipole::backwardTransformation(
147 774 moment_matrix[row_component], transfer_matrices,
148 max_level - min_cluster_level, my_long_dst_backward);
149
150 #pragma omp critical
151
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 long_dst += my_long_dst;
152 774 }
153
154 // finish off vector component
155
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
1548 long_dst_all.segment(row_component * vector_component_size,
156
1/2
✓ Branch 1 taken 774 times.
✗ Branch 2 not taken.
774 vector_component_size) += long_dst;
157 }
158 }
159
160 // go continuous and write output
161
4/8
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 303 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 303 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 303 times.
✗ Branch 11 not taken.
303 res.col(c) +=
162
2/4
✓ Branch 1 taken 303 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 303 times.
✗ Branch 5 not taken.
606 alpha * lhs.get_transformation_matrix().transpose() * long_dst_all;
163 }
164 303 }
165 };
166
167 template <typename H2LhsType, typename DenseRhsType, typename DenseResType,
168 typename AlphaType>
169 303 inline void H2_time_dense_product(const H2LhsType& lhs, const DenseRhsType& rhs,
170 DenseResType& res, const AlphaType& alpha) {
171 H2_time_dense_product_impl<H2LhsType, DenseRhsType, DenseResType,
172 303 AlphaType>::run(lhs, rhs, res, alpha);
173 303 }
174
175 /**
176 * \brief overwrite eigen implementation by using distributive law, i.e.,
177 *compute A*c+B*c instead of (A+B)*c
178 */
179 template <typename BinaryOp, typename BinaryLhs, typename BinaryRhs,
180 typename Rhs, int ProductType>
181 struct generic_product_impl<CwiseBinaryOp<BinaryOp, BinaryLhs, BinaryRhs>, Rhs,
182 H2, DenseShape, ProductType>
183 : generic_product_impl_base<
184 CwiseBinaryOp<BinaryOp, BinaryLhs, BinaryRhs>, Rhs,
185 generic_product_impl<CwiseBinaryOp<BinaryOp, BinaryLhs, BinaryRhs>,
186 Rhs, H2, DenseShape, ProductType>> {
187 typedef CwiseBinaryOp<BinaryOp, BinaryLhs, BinaryRhs> Lhs;
188 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
189
190 // we only need to specify scaleAndAddTo, everything else is build on this in
191 // the background
192 template <typename Dest>
193 134 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
194 const Scalar& alpha) {
195 typedef Product<BinaryLhs, Rhs, ProductType> LeftProduct;
196 typedef Product<BinaryRhs, Rhs, ProductType> RightProduct;
197 typedef CwiseBinaryOp<BinaryOp, LeftProduct, RightProduct> NewCwiseBinaryOp;
198 typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
199 typedef CwiseNullaryOp<scalar_constant_op<Scalar>, DenseMatrix>
200 ScalarCwiseNullaryOp;
201 typedef scalar_constant_op<Scalar> ScalarConstantOp;
202 typedef scalar_product_op<Scalar, Scalar> ScalarTimesOp;
203 typedef CwiseBinaryOp<ScalarTimesOp, ScalarCwiseNullaryOp, NewCwiseBinaryOp>
204 ScalarTimesNewCwiseBinaryOp;
205 typedef typename traits<Lhs>::Scalar LhsScalar;
206 typedef typename traits<Rhs>::Scalar RhsScalar;
207 typedef add_assign_op<LhsScalar, RhsScalar> addAssignOp;
208
209 static_assert(
210 is_same<BinaryOp, scalar_sum_op<LhsScalar, RhsScalar>>::value ||
211 is_same<BinaryOp,
212 scalar_difference_op<LhsScalar, RhsScalar>>::value,
213 "Product of CwiseBinaryOp not defined for this BinaryOp");
214
215
1/2
✓ Branch 2 taken 67 times.
✗ Branch 3 not taken.
134 LeftProduct lprod(lhs.lhs(), rhs);
216
1/2
✓ Branch 2 taken 67 times.
✗ Branch 3 not taken.
134 RightProduct rprod(lhs.rhs(), rhs);
217
1/2
✓ Branch 2 taken 67 times.
✗ Branch 3 not taken.
134 NewCwiseBinaryOp xpr_prod(lprod, rprod, lhs.functor());
218
1/2
✓ Branch 3 taken 67 times.
✗ Branch 4 not taken.
134 ScalarCwiseNullaryOp xpr_scalar(lprod.rows(), lprod.cols(),
219 134 ScalarConstantOp(alpha));
220
1/2
✓ Branch 2 taken 67 times.
✗ Branch 3 not taken.
134 ScalarTimesNewCwiseBinaryOp xpr(xpr_scalar, xpr_prod, ScalarTimesOp());
221 Assignment<Dest, ScalarTimesNewCwiseBinaryOp, addAssignOp,
222
1/2
✓ Branch 2 taken 67 times.
✗ Branch 3 not taken.
134 Dense2Dense>::run(dst, xpr, addAssignOp());
223 134 }
224 };
225 /**
226 * \brief overwrite eigen implementation by using associative law to compute
227 *a*(H*x) instead of (a*H)*x
228 */
229 template <typename ProdLhsScalar, typename ProdRhsScalar, typename BinaryLhs,
230 typename BinaryRhs, typename Rhs, int ProductType>
231 struct generic_product_impl<
232 CwiseBinaryOp<scalar_product_op<ProdLhsScalar, ProdRhsScalar>, BinaryLhs,
233 BinaryRhs>,
234 Rhs, H2, DenseShape, ProductType>
235 : generic_product_impl_base<
236 CwiseBinaryOp<scalar_product_op<ProdLhsScalar, ProdRhsScalar>,
237 BinaryLhs, BinaryRhs>,
238 Rhs,
239 generic_product_impl<
240 CwiseBinaryOp<scalar_product_op<ProdLhsScalar, ProdRhsScalar>,
241 BinaryLhs, BinaryRhs>,
242 Rhs, H2, DenseShape, ProductType>> {
243 typedef CwiseBinaryOp<scalar_product_op<ProdLhsScalar, ProdRhsScalar>,
244 BinaryLhs, BinaryRhs>
245 Lhs;
246 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
247
248 // we only need to specify scaleAndAddTo, everything else is build on this in
249 // the background
250 template <typename Dest>
251 2 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
252 const Scalar& alpha) {
253 2 Scalar beta = lhs.lhs().functor()();
254 generic_product_impl<BinaryRhs, Rhs, H2, DenseShape,
255
1/2
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
2 ProductType>::scaleAndAddTo(dst, lhs.rhs(), rhs,
256
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 alpha * beta);
257 2 }
258 };
259 // same overwrite, but for const CwiseBinaryOp by inheritance from non-const
260 // version
261 template <typename BinaryOp, typename BinaryLhs, typename BinaryRhs,
262 typename Rhs, int ProductType>
263 struct generic_product_impl<const CwiseBinaryOp<BinaryOp, BinaryLhs, BinaryRhs>,
264 Rhs, H2, DenseShape, ProductType>
265 : generic_product_impl<CwiseBinaryOp<BinaryOp, BinaryLhs, BinaryRhs>, Rhs,
266 H2, DenseShape, ProductType> {};
267
268 template <typename Lhs, typename Rhs, int ProductType>
269 struct generic_product_impl<Lhs, Rhs, H2, DenseShape, ProductType>
270 : generic_product_impl_base<
271 Lhs, Rhs,
272 generic_product_impl<Lhs, Rhs, H2, DenseShape, ProductType>> {
273 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
274
275 // we only need to specify scaleAndAddTo, everything else is build on this in
276 // the background
277 template <typename Dest>
278 379 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
279 const Scalar& alpha) {
280 typedef
281 typename nested_eval<Lhs, ((Rhs::Flags & RowMajorBit) == 0)
282 ? 1
283 : Rhs::ColsAtCompileTime>::type LhsNested;
284 typedef typename nested_eval<
285 Rhs, ((Lhs::Flags & RowMajorBit) == 0) ? 1 : Dynamic>::type RhsNested;
286 379 LhsNested lhsNested(lhs);
287 379 RhsNested rhsNested(rhs);
288 379 internal::H2_time_dense_product(lhsNested, rhsNested, dst, alpha);
289 379 }
290 };
291
292 template <typename Lhs, typename Rhs, int Options, int ProductTag>
293 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, H2, DenseShape>
294 : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> {
295 typedef Product<Lhs, Rhs, Options> XprType;
296 typedef typename XprType::PlainObject PlainObject;
297 typedef evaluator<PlainObject> Base;
298
299 enum { Flags = Base::Flags };
300
301 208 explicit product_evaluator(const XprType& xpr)
302
1/2
✓ Branch 4 taken 106 times.
✗ Branch 5 not taken.
208 : m_result(xpr.rows(), xpr.cols()) {
303
1/2
✓ Branch 2 taken 106 times.
✗ Branch 3 not taken.
208 ::new (static_cast<Base*>(this)) Base(m_result);
304 208 generic_product_impl<Lhs, Rhs, H2, DenseShape, ProductTag>::evalTo(
305
1/2
✓ Branch 3 taken 106 times.
✗ Branch 4 not taken.
208 m_result, xpr.lhs(), xpr.rhs());
306 208 }
307
308 protected:
309 PlainObject m_result;
310 };
311
312 } // end namespace internal
313
314 } // end namespace Eigen
315
316 #endif // BEMBEL_SRC_H2MATRIX_EIGENHELPER_H2DENSEPRODUCT_HPP_
317