20 #define TENSOR_LOAD_IMPL
22 #include <tensor/tensor.h>
23 #include <tensor/io.h>
24 #include <tensor/tensor_lapack.h>
31 template<
typename elt_t>
33 do_foldin_into(Tensor<elt_t> &output,
34 const Tensor<elt_t> &a,
int _ndx1,
const Tensor<elt_t> &b,
int _ndx2)
36 index i_len,j_len,k_len,l_len,m_len;
38 const index ranka = a.rank();
39 const index rankb = b.rank();
40 index ndx1 = normalize_index(_ndx1, ranka);
41 index ndx2 = normalize_index(_ndx2, rankb);
42 Indices new_dims(ranka + rankb - 2);
53 for (i = 0, rank = 0, k_len=1; i < ndx2; i++) {
54 index di = b.dimension(i);
55 new_dims.at(rank++) = di;
58 l_len = b.dimension(i++);
59 for (i = 0, i_len=1; i < ndx1; i++) {
60 index di = a.dimension(i);
61 new_dims.at(rank++) = di;
64 if (l_len != a.dimension(i++)) {
65 std::cerr <<
"Unable to foldin() tensors with dimensions" << std::endl
66 <<
"\t" << a.dimensions() <<
" and "
67 << b.dimensions() << std::endl
68 <<
"\tbecause indices " << ndx1 <<
" and " << ndx2
69 <<
" have different sizes" << std::endl;
72 for (j_len = 1; i < ranka; i++) {
73 index di = a.dimension(i);
74 new_dims.at(rank++) = di;
77 for (m_len = 1, i = ndx2+1; i < rankb; i++) {
78 index di = b.dimension(i);
79 new_dims.at(rank++) = di;
89 output = Tensor<elt_t>(new_dims);
90 if (output.size() == 0)
93 elt_t *pC = output.begin();
94 elt_t zero = number_zero<elt_t>();
95 elt_t one = number_one<elt_t>();
96 const elt_t *pA = a.begin();
97 const elt_t *pB = b.begin();
100 index il_len = i_len*l_len;
101 index kl_len = k_len*l_len;
102 index ki_len = k_len*i_len;
103 for (index m = 0; m < m_len; m++) {
104 for (index j = 0; j < j_len; j++) {
105 gemm(op1, op2, k_len, i_len, l_len, one,
106 pB + kl_len*m, k_len, pA + il_len*j, i_len,
107 zero, pC + ki_len*(j + j_len*m), k_len);