summaryrefslogtreecommitdiff
blob: 45229469d2a41c536a90e5347f1c75ed3dddb0e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#define PBLAS_PREFIX p
#define PBLAS_FUNC(NAME) CAT(CAT(CAT(PBLAS_PREFIX, SCALAR_PREFIX),NAME),_)

template<> class pblas_interface<SCALAR> : public blacs_interface<SCALAR>
{
public:
  static inline std::string name()
  {
    return MAKE_STRING(PBLASNAME);
  }

  static inline void parallel_axpy(const SCALAR& coef,
      gene_vector& x, int *descX,
      gene_vector& y, int *descY,
      const int& size
      )
  {
    int iZERO = 0, iONE = 1;
    PBLAS_FUNC(axpy)(&size, &coef,
        x, &iONE, &iONE, descX, &iONE,
        y, &iONE, &iONE, descY, &iONE
    );
  }

  static inline void parallel_matrix_vector_product(
      int GlobalRows, int GlobalCols,
      gene_matrix& A, int *descA,
      gene_vector& x, int *descX,
      gene_vector& y, int *descY
      )
  {
    real_type alpha = 1., beta = 0.;
    int iONE = 1;
    int myid, procnum;
    const char notrans = 'N';
    blacs_pinfo_(&myid, &procnum);

    PBLAS_FUNC(gemv)(&notrans, &GlobalRows, &GlobalCols,
        &alpha, A, &iONE, &iONE, descA,
                x, &iONE, &iONE, descX, &iONE,
         &beta, y, &iONE, &iONE, descY, &iONE
    );

  }


  static inline void parallel_lu_decomp(gene_matrix& X, const int* desc)
  {
    const int GlobalRows = desc[2], GlobalCols = desc[3];
    const int iONE = 1;
    int info;
    std::vector<int> ipiv(desc[8] + desc[4]);
    PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc,
        &ipiv[0], &info);
  }

  static inline void parallel_cholesky(gene_matrix& X, const int* desc)
  {
    const int N = desc[2], iONE = 1;
    const char UPLO = 'U';
    int info;
    PBLAS_FUNC(potrf)(&UPLO, &N, X, &iONE, &iONE, desc, &info);
    if (info != 0)
      cerr << " { cholesky error : " << info << " } ";
  }

  static inline void parallel_qr_decomp(gene_matrix& X, const int* desc)
  {
    const int GlobalRows = desc[2], GlobalCols = desc[3],
              BlockRows = desc[4], BlockCols = desc[5],
              ctxt = desc[1];

    int myrow, mycol, nprow, npcol, lwork;
    SCALAR lworkd;
    blacs_gridinfo_(&ctxt, &nprow, &npcol, &myrow, &mycol);

    const int iONE = 1, iZERO = 0, imONE = -1,
             ipivdim = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &npcol);
    int info;
    std::vector<int> ipiv(ipivdim);
    std::vector<SCALAR> tau(ipivdim);

    // Retrieve LWORK
    PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info);
    lwork = static_cast<int>(lworkd);
//    if (info != 0)
//      cerr << " { qr_decomp lwork error } ";

    std::vector<SCALAR> work(lwork);
    PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info);
//    if (info != 0)
//      cerr << " { qr_decomp computation error } ";
  }

  static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ)
  {
    const char jobz = 'V', uplo = 'u';
    const int N = descA[2], iONE = 1, iZERO = 0, imONE = -1;
    std::vector<SCALAR> work;
    std::vector<int> iwork;
    int lwork, liwork, info;
    SCALAR lworkd;

    // Retrieve l(i)work
    PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
        Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info);
    lwork = static_cast<int>(lworkd);
    work.resize(lwork); iwork.resize(liwork);
//    if (info != 0)
//      cerr << " { symm_ev l(i)work error } ";

    PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
        Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info);
//    if (info != 0)
//      cerr << " { symm_ev computation error } ";
  }
};