blob: 16cc79930b76b4b8c3da86695f3348d526b81d9d [file] [log] [blame]
Tim Murray25207df2015-01-12 16:47:56 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.renderscript;
18
19import android.annotation.IntDef;
20import java.lang.annotation.Retention;
21import java.lang.annotation.RetentionPolicy;
22
23/**
24 *
Miao Wangfb675a52015-05-12 18:22:20 -070025 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26 *
27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28 * building blocks for performing basic vector and matrix operations.
29 *
30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
Tim Murray25207df2015-01-12 16:47:56 -080031 *
Xusong Wang8b4548c2021-01-05 10:09:52 -080032 * @deprecated Renderscript has been deprecated in API level 31. Please refer to the <a
33 * href="https://developer.android.com/guide/topics/renderscript/migration-guide">migration
34 * guide</a> for the proposed alternatives.
Tim Murray25207df2015-01-12 16:47:56 -080035 **/
Xusong Wang8b4548c2021-01-05 10:09:52 -080036@Deprecated
Tim Murray25207df2015-01-12 16:47:56 -080037public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
38 private Allocation mLUT;
39
40 private ScriptIntrinsicBLAS(long id, RenderScript rs) {
41 super(id, rs);
42 }
43
44 private static final int RsBlas_sdsdot = 1;
45 private static final int RsBlas_dsdot = 2;
46 private static final int RsBlas_sdot = 3;
47 private static final int RsBlas_ddot = 4;
48 private static final int RsBlas_cdotu_sub = 5;
49 private static final int RsBlas_cdotc_sub = 6;
50 private static final int RsBlas_zdotu_sub = 7;
51 private static final int RsBlas_zdotc_sub = 8;
52 private static final int RsBlas_snrm2 = 9;
53 private static final int RsBlas_sasum = 10;
54 private static final int RsBlas_dnrm2 = 11;
55 private static final int RsBlas_dasum = 12;
56 private static final int RsBlas_scnrm2 = 13;
57 private static final int RsBlas_scasum = 14;
58 private static final int RsBlas_dznrm2 = 15;
59 private static final int RsBlas_dzasum = 16;
60 private static final int RsBlas_isamax = 17;
61 private static final int RsBlas_idamax = 18;
62 private static final int RsBlas_icamax = 19;
63 private static final int RsBlas_izamax = 20;
64 private static final int RsBlas_sswap = 21;
65 private static final int RsBlas_scopy = 22;
66 private static final int RsBlas_saxpy = 23;
67 private static final int RsBlas_dswap = 24;
68 private static final int RsBlas_dcopy = 25;
69 private static final int RsBlas_daxpy = 26;
70 private static final int RsBlas_cswap = 27;
71 private static final int RsBlas_ccopy = 28;
72 private static final int RsBlas_caxpy = 29;
73 private static final int RsBlas_zswap = 30;
74 private static final int RsBlas_zcopy = 31;
75 private static final int RsBlas_zaxpy = 32;
76 private static final int RsBlas_srotg = 33;
77 private static final int RsBlas_srotmg = 34;
78 private static final int RsBlas_srot = 35;
79 private static final int RsBlas_srotm = 36;
80 private static final int RsBlas_drotg = 37;
81 private static final int RsBlas_drotmg = 38;
82 private static final int RsBlas_drot = 39;
83 private static final int RsBlas_drotm = 40;
84 private static final int RsBlas_sscal = 41;
85 private static final int RsBlas_dscal = 42;
86 private static final int RsBlas_cscal = 43;
87 private static final int RsBlas_zscal = 44;
88 private static final int RsBlas_csscal = 45;
89 private static final int RsBlas_zdscal = 46;
90 private static final int RsBlas_sgemv = 47;
91 private static final int RsBlas_sgbmv = 48;
92 private static final int RsBlas_strmv = 49;
93 private static final int RsBlas_stbmv = 50;
94 private static final int RsBlas_stpmv = 51;
95 private static final int RsBlas_strsv = 52;
96 private static final int RsBlas_stbsv = 53;
97 private static final int RsBlas_stpsv = 54;
98 private static final int RsBlas_dgemv = 55;
99 private static final int RsBlas_dgbmv = 56;
100 private static final int RsBlas_dtrmv = 57;
101 private static final int RsBlas_dtbmv = 58;
102 private static final int RsBlas_dtpmv = 59;
103 private static final int RsBlas_dtrsv = 60;
104 private static final int RsBlas_dtbsv = 61;
105 private static final int RsBlas_dtpsv = 62;
106 private static final int RsBlas_cgemv = 63;
107 private static final int RsBlas_cgbmv = 64;
108 private static final int RsBlas_ctrmv = 65;
109 private static final int RsBlas_ctbmv = 66;
110 private static final int RsBlas_ctpmv = 67;
111 private static final int RsBlas_ctrsv = 68;
112 private static final int RsBlas_ctbsv = 69;
113 private static final int RsBlas_ctpsv = 70;
114 private static final int RsBlas_zgemv = 71;
115 private static final int RsBlas_zgbmv = 72;
116 private static final int RsBlas_ztrmv = 73;
117 private static final int RsBlas_ztbmv = 74;
118 private static final int RsBlas_ztpmv = 75;
119 private static final int RsBlas_ztrsv = 76;
120 private static final int RsBlas_ztbsv = 77;
121 private static final int RsBlas_ztpsv = 78;
122 private static final int RsBlas_ssymv = 79;
123 private static final int RsBlas_ssbmv = 80;
124 private static final int RsBlas_sspmv = 81;
125 private static final int RsBlas_sger = 82;
126 private static final int RsBlas_ssyr = 83;
127 private static final int RsBlas_sspr = 84;
128 private static final int RsBlas_ssyr2 = 85;
129 private static final int RsBlas_sspr2 = 86;
130 private static final int RsBlas_dsymv = 87;
131 private static final int RsBlas_dsbmv = 88;
132 private static final int RsBlas_dspmv = 89;
133 private static final int RsBlas_dger = 90;
134 private static final int RsBlas_dsyr = 91;
135 private static final int RsBlas_dspr = 92;
136 private static final int RsBlas_dsyr2 = 93;
137 private static final int RsBlas_dspr2 = 94;
138 private static final int RsBlas_chemv = 95;
139 private static final int RsBlas_chbmv = 96;
140 private static final int RsBlas_chpmv = 97;
141 private static final int RsBlas_cgeru = 98;
142 private static final int RsBlas_cgerc = 99;
143 private static final int RsBlas_cher = 100;
144 private static final int RsBlas_chpr = 101;
145 private static final int RsBlas_cher2 = 102;
146 private static final int RsBlas_chpr2 = 103;
147 private static final int RsBlas_zhemv = 104;
148 private static final int RsBlas_zhbmv = 105;
149 private static final int RsBlas_zhpmv = 106;
150 private static final int RsBlas_zgeru = 107;
151 private static final int RsBlas_zgerc = 108;
152 private static final int RsBlas_zher = 109;
153 private static final int RsBlas_zhpr = 110;
154 private static final int RsBlas_zher2 = 111;
155 private static final int RsBlas_zhpr2 = 112;
156 private static final int RsBlas_sgemm = 113;
157 private static final int RsBlas_ssymm = 114;
158 private static final int RsBlas_ssyrk = 115;
159 private static final int RsBlas_ssyr2k = 116;
160 private static final int RsBlas_strmm = 117;
161 private static final int RsBlas_strsm = 118;
162 private static final int RsBlas_dgemm = 119;
163 private static final int RsBlas_dsymm = 120;
164 private static final int RsBlas_dsyrk = 121;
165 private static final int RsBlas_dsyr2k = 122;
166 private static final int RsBlas_dtrmm = 123;
167 private static final int RsBlas_dtrsm = 124;
168 private static final int RsBlas_cgemm = 125;
169 private static final int RsBlas_csymm = 126;
170 private static final int RsBlas_csyrk = 127;
171 private static final int RsBlas_csyr2k = 128;
172 private static final int RsBlas_ctrmm = 129;
173 private static final int RsBlas_ctrsm = 130;
174 private static final int RsBlas_zgemm = 131;
175 private static final int RsBlas_zsymm = 132;
176 private static final int RsBlas_zsyrk = 133;
177 private static final int RsBlas_zsyr2k = 134;
178 private static final int RsBlas_ztrmm = 135;
179 private static final int RsBlas_ztrsm = 136;
180 private static final int RsBlas_chemm = 137;
181 private static final int RsBlas_cherk = 138;
182 private static final int RsBlas_cher2k = 139;
183 private static final int RsBlas_zhemm = 140;
184 private static final int RsBlas_zherk = 141;
185 private static final int RsBlas_zher2k = 142;
186
Tim Murray9cb16a22015-04-01 11:07:16 -0700187 // BLAS extensions start here
188 private static final int RsBlas_bnnm = 1000;
189
Tim Murray25207df2015-01-12 16:47:56 -0800190 /**
Miao Wangfb675a52015-05-12 18:22:20 -0700191 * Create an intrinsic to access BLAS subroutines.
192 *
193 * @param rs The RenderScript context
194 * @return ScriptIntrinsicBLAS
Tim Murray25207df2015-01-12 16:47:56 -0800195 */
196 public static ScriptIntrinsicBLAS create(RenderScript rs) {
197 long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs));
198 return new ScriptIntrinsicBLAS(id, rs);
199 }
200
Miao Wangfb675a52015-05-12 18:22:20 -0700201 /**
202 * @hide
203 */
Tim Murray25207df2015-01-12 16:47:56 -0800204 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
205 @Retention(RetentionPolicy.SOURCE)
206 public @interface Transpose {}
207
Miao Wangfb675a52015-05-12 18:22:20 -0700208 /**
209 * @hide
210 */
Tim Murray25207df2015-01-12 16:47:56 -0800211 @IntDef({UPPER, LOWER})
212 @Retention(RetentionPolicy.SOURCE)
213 public @interface Uplo {}
214
Miao Wangfb675a52015-05-12 18:22:20 -0700215 /**
216 * @hide
217 */
Tim Murray25207df2015-01-12 16:47:56 -0800218 @IntDef({NON_UNIT, UNIT})
219 @Retention(RetentionPolicy.SOURCE)
220 public @interface Diag {}
221
Miao Wangfb675a52015-05-12 18:22:20 -0700222 /**
223 * @hide
224 */
Tim Murray25207df2015-01-12 16:47:56 -0800225 @IntDef({LEFT, RIGHT})
226 @Retention(RetentionPolicy.SOURCE)
227 public @interface Side {}
228
229 public static final int NO_TRANSPOSE = 111;
230 public static final int TRANSPOSE = 112;
231 public static final int CONJ_TRANSPOSE = 113;
232
233 public static final int UPPER = 121;
234 public static final int LOWER = 122;
235
236 public static final int NON_UNIT = 131;
237 public static final int UNIT = 132;
238
239 public static final int LEFT = 141;
240 public static final int RIGHT = 142;
241
242 static void validateSide(@Side int Side) {
243 if (Side != LEFT && Side != RIGHT) {
244 throw new RSRuntimeException("Invalid side passed to BLAS");
245 }
246 }
247
248 static void validateTranspose(@Transpose int Trans) {
249 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
250 Trans != CONJ_TRANSPOSE) {
251 throw new RSRuntimeException("Invalid transpose passed to BLAS");
252 }
253 }
254
255 static void validateConjTranspose(@Transpose int Trans) {
256 if (Trans != NO_TRANSPOSE &&
257 Trans != CONJ_TRANSPOSE) {
258 throw new RSRuntimeException("Invalid transpose passed to BLAS");
259 }
260 }
261
262 static void validateDiag(@Diag int Diag) {
263 if (Diag != NON_UNIT && Diag != UNIT) {
264 throw new RSRuntimeException("Invalid diag passed to BLAS");
265 }
266 }
267
268 static void validateUplo(@Uplo int Uplo) {
Miao Wang37ae07c2015-04-24 11:19:53 -0700269 if (Uplo != UPPER && Uplo != LOWER) {
Tim Murray25207df2015-01-12 16:47:56 -0800270 throw new RSRuntimeException("Invalid uplo passed to BLAS");
271 }
272 }
273
274
275 /**
276 * Level 2 BLAS
277 */
278
279 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
280 validateTranspose(TransA);
281 int M = A.getType().getY();
282 int N = A.getType().getX();
283 if (!A.getType().getElement().isCompatible(e) ||
284 !X.getType().getElement().isCompatible(e) ||
285 !Y.getType().getElement().isCompatible(e)) {
286 throw new RSRuntimeException("Called BLAS with wrong Element type");
287 }
288 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
289 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
290 }
291
292 if (incX <= 0 || incY <= 0) {
293 throw new RSRuntimeException("Vector increments must be greater than 0");
294 }
295 int expectedXDim = -1, expectedYDim = -1;
296 if (TransA == NO_TRANSPOSE) {
297 expectedXDim = 1 + (N - 1) * incX;
298 expectedYDim = 1 + (M - 1) * incY;
299 } else {
300 expectedXDim = 1 + (M - 1) * incX;
301 expectedYDim = 1 + (N - 1) * incY;
302 }
303 if (X.getType().getX() != expectedXDim ||
Miao Wang68ca43e2015-04-23 15:06:09 -0700304 Y.getType().getX() != expectedYDim) {
Tim Murray25207df2015-01-12 16:47:56 -0800305 throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
306 }
307 }
Miao Wangfb675a52015-05-12 18:22:20 -0700308
309 /**
310 * SGEMV performs one of the matrix-vector operations
311 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
312 *
313 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
314 *
315 * @param TransA The type of transpose applied to matrix A.
316 * @param alpha The scalar alpha.
317 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
318 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
319 * @param incX The increment for the elements of vector x, must be larger than zero.
320 * @param beta The scalar beta.
321 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
322 * @param incY The increment for the elements of vector y, must be larger than zero.
323 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700324 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800325 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
326 int M = A.getType().getY();
327 int N = A.getType().getX();
328 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
329 }
Miao Wangfb675a52015-05-12 18:22:20 -0700330
331 /**
332 * DGEMV performs one of the matrix-vector operations
333 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
334 *
335 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
336 *
337 * @param TransA The type of transpose applied to matrix A.
338 * @param alpha The scalar alpha.
339 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
340 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
341 * @param incX The increment for the elements of vector x, must be larger than zero.
342 * @param beta The scalar beta.
343 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
344 * @param incY The increment for the elements of vector y, must be larger than zero.
345 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700346 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800347 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
348 int M = A.getType().getY();
349 int N = A.getType().getX();
350 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
351 }
Miao Wangfb675a52015-05-12 18:22:20 -0700352
353 /**
354 * CGEMV performs one of the matrix-vector operations
355 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
356 *
357 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
358 *
359 * @param TransA The type of transpose applied to matrix A.
360 * @param alpha The scalar alpha.
361 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
362 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
363 * @param incX The increment for the elements of vector x, must be larger than zero.
364 * @param beta The scalar beta.
365 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
366 * @param incY The increment for the elements of vector y, must be larger than zero.
367 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700368 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800369 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
370 int M = A.getType().getY();
371 int N = A.getType().getX();
372 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
373 }
Miao Wangfb675a52015-05-12 18:22:20 -0700374
375 /**
376 * ZGEMV performs one of the matrix-vector operations
377 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
378 *
379 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
380 *
381 * @param TransA The type of transpose applied to matrix A.
382 * @param alpha The scalar alpha.
383 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
384 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
385 * @param incX The increment for the elements of vector x, must be larger than zero.
386 * @param beta The scalar beta.
387 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
388 * @param incY The increment for the elements of vector y, must be larger than zero.
389 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700390 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800391 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
392 int M = A.getType().getY();
393 int N = A.getType().getX();
394 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
395 }
396
Miao Wangfb675a52015-05-12 18:22:20 -0700397 /**
398 * SGBMV performs one of the matrix-vector operations
399 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
400 *
401 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
402 *
403 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
404 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
405 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
406 * for i in range(0, m):
407 * for j in range(max(0, i-kl), min(i+ku+1, n)):
408 * b[i, j-i+kl] = a[i, j]
409 *
410 * @param TransA The type of transpose applied to matrix A.
411 * @param KL The number of sub-diagonals of the matrix A.
412 * @param KU The number of super-diagonals of the matrix A.
413 * @param alpha The scalar alpha.
414 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
415 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
416 * @param incX The increment for the elements of vector x, must be larger than zero.
417 * @param beta The scalar beta.
418 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
419 * @param incY The increment for the elements of vector y, must be larger than zero.
420 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700421 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800422 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
423 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
424 if (KL < 0 || KU < 0) {
425 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
426 }
427 int M = A.getType().getY();
428 int N = A.getType().getX();
429 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
430 }
Miao Wangfb675a52015-05-12 18:22:20 -0700431
432 /**
433 * DGBMV performs one of the matrix-vector operations
434 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
435 *
436 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
437 *
438 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
439 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
440 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
441 * for i in range(0, m):
442 * for j in range(max(0, i-kl), min(i+ku+1, n)):
443 * b[i, j-i+kl] = a[i, j]
444 *
445 * @param TransA The type of transpose applied to matrix A.
446 * @param KL The number of sub-diagonals of the matrix A.
447 * @param KU The number of super-diagonals of the matrix A.
448 * @param alpha The scalar alpha.
449 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
450 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
451 * @param incX The increment for the elements of vector x, must be larger than zero.
452 * @param beta The scalar beta.
453 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
454 * @param incY The increment for the elements of vector y, must be larger than zero.
455 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700456 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800457 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
458 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
459 if (KL < 0 || KU < 0) {
460 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
461 }
462 int M = A.getType().getY();
463 int N = A.getType().getX();
464 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
465 }
Miao Wangfb675a52015-05-12 18:22:20 -0700466
467 /**
468 * CGBMV performs one of the matrix-vector operations
469 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
470 *
471 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
472 *
473 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
474 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
475 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
476 * for i in range(0, m):
477 * for j in range(max(0, i-kl), min(i+ku+1, n)):
478 * b[i, j-i+kl] = a[i, j]
479 *
480 * @param TransA The type of transpose applied to matrix A.
481 * @param KL The number of sub-diagonals of the matrix A.
482 * @param KU The number of super-diagonals of the matrix A.
483 * @param alpha The scalar alpha.
484 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
485 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
486 * @param incX The increment for the elements of vector x, must be larger than zero.
487 * @param beta The scalar beta.
488 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
489 * @param incY The increment for the elements of vector y, must be larger than zero.
490 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700491 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800492 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
493 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
494 if (KL < 0 || KU < 0) {
495 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
496 }
497 int M = A.getType().getY();
498 int N = A.getType().getX();
499 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
500 }
Miao Wangfb675a52015-05-12 18:22:20 -0700501
502 /**
503 * ZGBMV performs one of the matrix-vector operations
504 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
505 *
506 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
507 *
508 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
509 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
510 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
511 * for i in range(0, m):
512 * for j in range(max(0, i-kl), min(i+ku+1, n)):
513 * b[i, j-i+kl] = a[i, j]
514 *
515 * @param TransA The type of transpose applied to matrix A.
516 * @param KL The number of sub-diagonals of the matrix A.
517 * @param KU The number of super-diagonals of the matrix A.
518 * @param alpha The scalar alpha.
519 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
520 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
521 * @param incX The increment for the elements of vector x, must be larger than zero.
522 * @param beta The scalar beta.
523 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
524 * @param incY The increment for the elements of vector y, must be larger than zero.
525 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700526 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800527 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
528 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
529 if (KL < 0 || KU < 0) {
530 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
531 }
532 int M = A.getType().getY();
533 int N = A.getType().getX();
534 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
535 }
536
Miao Wang68ca43e2015-04-23 15:06:09 -0700537 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800538 validateTranspose(TransA);
Miao Wang68ca43e2015-04-23 15:06:09 -0700539 validateUplo(Uplo);
540 validateDiag(Diag);
Tim Murray25207df2015-01-12 16:47:56 -0800541 int N = A.getType().getY();
542 if (A.getType().getX() != N) {
543 throw new RSRuntimeException("A must be a square matrix for TRMV");
544 }
545 if (!A.getType().getElement().isCompatible(e) ||
546 !X.getType().getElement().isCompatible(e)) {
547 throw new RSRuntimeException("Called BLAS with wrong Element type");
548 }
549 if (X.getType().getY() > 1) {
550 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
551 }
552
553 if (incX <= 0) {
554 throw new RSRuntimeException("Vector increments must be greater than 0");
555 }
556 int expectedXDim = 1 + (N - 1) * incX;
557 if (X.getType().getX() != expectedXDim) {
558 throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
559 }
560 }
561
562 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
563 validateTranspose(TransA);
564 validateUplo(Uplo);
565 validateDiag(Diag);
566 if (!Ap.getType().getElement().isCompatible(e) ||
567 !X.getType().getElement().isCompatible(e)) {
568 throw new RSRuntimeException("Called BLAS with wrong Element type");
569 }
570 if (X.getType().getY() > 1) {
571 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
572 }
573
574 if (Ap.getType().getY() > 1) {
575 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
576 }
577
578 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
Miao Wang68ca43e2015-04-23 15:06:09 -0700579 //is it really doing anything?
Tim Murray25207df2015-01-12 16:47:56 -0800580 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
581 throw new RSRuntimeException("Invalid dimension for Ap");
582 }
Miao Wang68ca43e2015-04-23 15:06:09 -0700583 if (incX <= 0) {
584 throw new RSRuntimeException("Vector increments must be greater than 0");
585 }
Tim Murray25207df2015-01-12 16:47:56 -0800586 int expectedXDim = 1 + (N - 1) * incX;
587 if (X.getType().getX() != expectedXDim) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700588 throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
Tim Murray25207df2015-01-12 16:47:56 -0800589 }
590
591 return N;
592 }
593
Miao Wangfb675a52015-05-12 18:22:20 -0700594 /**
595 * STRMV performs one of the matrix-vector operations
596 * x := A*x or x := A**T*x
597 *
598 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
599 *
600 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
601 * @param TransA The type of transpose applied to matrix A.
602 * @param Diag Specifies whether or not A is unit triangular.
603 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
604 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
605 * @param incX The increment for the elements of vector x, must be larger than zero.
606 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700607 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700608 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800609 int N = A.getType().getY();
610 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
611 }
Miao Wangfb675a52015-05-12 18:22:20 -0700612
613 /**
614 * DTRMV performs one of the matrix-vector operations
615 * x := A*x or x := A**T*x
616 *
617 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
618 *
619 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
620 * @param TransA The type of transpose applied to matrix A.
621 * @param Diag Specifies whether or not A is unit triangular.
622 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
623 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
624 * @param incX The increment for the elements of vector x, must be larger than zero.
625 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700626 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700627 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800628 int N = A.getType().getY();
629 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
630 }
Miao Wangfb675a52015-05-12 18:22:20 -0700631
632 /**
633 * CTRMV performs one of the matrix-vector operations
634 * x := A*x or x := A**T*x or x := A**H*x
635 *
636 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
637 *
638 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
639 * @param TransA The type of transpose applied to matrix A.
640 * @param Diag Specifies whether or not A is unit triangular.
641 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
642 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
643 * @param incX The increment for the elements of vector x, must be larger than zero.
644 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700645 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700646 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800647 int N = A.getType().getY();
648 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
649 }
Miao Wangfb675a52015-05-12 18:22:20 -0700650
651 /**
652 * ZTRMV performs one of the matrix-vector operations
653 * x := A*x or x := A**T*x or x := A**H*x
654 *
655 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
656 *
657 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
658 * @param TransA The type of transpose applied to matrix A.
659 * @param Diag Specifies whether or not A is unit triangular.
660 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
661 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
662 * @param incX The increment for the elements of vector x, must be larger than zero.
663 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700664 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700665 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800666 int N = A.getType().getY();
667 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
668 }
Miao Wang68ca43e2015-04-23 15:06:09 -0700669
Miao Wangfb675a52015-05-12 18:22:20 -0700670 /**
671 * STBMV performs one of the matrix-vector operations
672 * x := A*x or x := A**T*x
673 *
674 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
675 *
676 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
677 * but only the region N*(K+1) will be referenced. The following subroutine can is an
678 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
679 * for i in range(0, n):
680 * for j in range(i, min(i+k+1, n)):
681 * b[i, j-i] = a[i, j]
682 *
683 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
684 * @param TransA The type of transpose applied to matrix A.
685 * @param Diag Specifies whether or not A is unit triangular.
686 * @param K The number of off-diagonals of the matrix A
687 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
688 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
689 * @param incX The increment for the elements of vector x, must be larger than zero.
690 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700691 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700692 // TBMV has the same requirements as TRMV + K >= 0
693 if (K < 0) {
694 throw new RSRuntimeException("K must be greater than or equal to 0");
695 }
696 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800697 int N = A.getType().getY();
698 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
699 }
Miao Wangfb675a52015-05-12 18:22:20 -0700700
701 /**
702 * DTBMV performs one of the matrix-vector operations
703 * x := A*x or x := A**T*x
704 *
705 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
706 *
707 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
708 * but only the region N*(K+1) will be referenced. The following subroutine can is an
709 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
710 * for i in range(0, n):
711 * for j in range(i, min(i+k+1, n)):
712 * b[i, j-i] = a[i, j]
713 *
714 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
715 * @param TransA The type of transpose applied to matrix A.
716 * @param Diag Specifies whether or not A is unit triangular.
717 * @param K The number of off-diagonals of the matrix A
718 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
719 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
720 * @param incX The increment for the elements of vector x, must be larger than zero.
721 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700722 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700723 // TBMV has the same requirements as TRMV + K >= 0
724 if (K < 0) {
725 throw new RSRuntimeException("K must be greater than or equal to 0");
726 }
727 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800728 int N = A.getType().getY();
729 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
730 }
Miao Wangfb675a52015-05-12 18:22:20 -0700731
732 /**
733 * CTBMV performs one of the matrix-vector operations
734 * x := A*x or x := A**T*x or x := A**H*x
735 *
736 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
737 *
738 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
739 * but only the region N*(K+1) will be referenced. The following subroutine can is an
740 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
741 * for i in range(0, n):
742 * for j in range(i, min(i+k+1, n)):
743 * b[i, j-i] = a[i, j]
744 *
745 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
746 * @param TransA The type of transpose applied to matrix A.
747 * @param Diag Specifies whether or not A is unit triangular.
748 * @param K The number of off-diagonals of the matrix A
749 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
750 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
751 * @param incX The increment for the elements of vector x, must be larger than zero.
752 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700753 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700754 // TBMV has the same requirements as TRMV + K >= 0
755 if (K < 0) {
756 throw new RSRuntimeException("K must be greater than or equal to 0");
757 }
758 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800759 int N = A.getType().getY();
760 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
761 }
Miao Wangfb675a52015-05-12 18:22:20 -0700762
763 /**
764 * ZTBMV performs one of the matrix-vector operations
765 * x := A*x or x := A**T*x or x := A**H*x
766 *
767 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
768 *
769 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
770 * but only the region N*(K+1) will be referenced. The following subroutine can is an
771 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
772 * for i in range(0, n):
773 * for j in range(i, min(i+k+1, n)):
774 * b[i, j-i] = a[i, j]
775 *
776 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
777 * @param TransA The type of transpose applied to matrix A.
778 * @param Diag Specifies whether or not A is unit triangular.
779 * @param K The number of off-diagonals of the matrix A
780 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
781 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
782 * @param incX The increment for the elements of vector x, must be larger than zero.
783 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700784 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700785 // TBMV has the same requirements as TRMV + K >= 0
786 if (K < 0) {
787 throw new RSRuntimeException("K must be greater than or equal to 0");
788 }
789 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800790 int N = A.getType().getY();
791 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
792 }
Miao Wangfb675a52015-05-12 18:22:20 -0700793
794 /**
795 * STPMV performs one of the matrix-vector operations
796 * x := A*x or x := A**T*x
797 *
798 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
799 *
800 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
801 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
802 * 'a' to packed matrix 'b'.
803 * k = 0
804 * for i in range(0, n):
805 * for j in range(i, n):
806 * b[k++] = a[i, j]
807 *
808 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
809 * @param TransA The type of transpose applied to matrix A.
810 * @param Diag Specifies whether or not A is unit triangular.
811 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
812 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
813 * @param incX The increment for the elements of vector x, must be larger than zero.
814 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700815 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800816 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
817 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
818 }
Miao Wangfb675a52015-05-12 18:22:20 -0700819
820 /**
821 * DTPMV performs one of the matrix-vector operations
822 * x := A*x or x := A**T*x
823 *
824 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
825 *
826 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
827 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
828 * 'a' to packed matrix 'b'.
829 * k = 0
830 * for i in range(0, n):
831 * for j in range(i, n):
832 * b[k++] = a[i, j]
833 *
834 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
835 * @param TransA The type of transpose applied to matrix A.
836 * @param Diag Specifies whether or not A is unit triangular.
837 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
838 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
839 * @param incX The increment for the elements of vector x, must be larger than zero.
840 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700841 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800842 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
843 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
844 }
Miao Wangfb675a52015-05-12 18:22:20 -0700845
846 /**
847 * CTPMV performs one of the matrix-vector operations
848 * x := A*x or x := A**T*x or x := A**H*x
849 *
850 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
851 *
852 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
853 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
854 * 'a' to packed matrix 'b'.
855 * k = 0
856 * for i in range(0, n):
857 * for j in range(i, n):
858 * b[k++] = a[i, j]
859 *
860 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
861 * @param TransA The type of transpose applied to matrix A.
862 * @param Diag Specifies whether or not A is unit triangular.
863 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
864 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
865 * @param incX The increment for the elements of vector x, must be larger than zero.
866 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700867 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800868 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
869 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
870 }
Miao Wangfb675a52015-05-12 18:22:20 -0700871
872 /**
873 * ZTPMV performs one of the matrix-vector operations
874 * x := A*x or x := A**T*x or x := A**H*x
875 *
876 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
877 *
878 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
879 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
880 * 'a' to packed matrix 'b'.
881 * k = 0
882 * for i in range(0, n):
883 * for j in range(i, n):
884 * b[k++] = a[i, j]
885 *
886 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
887 * @param TransA The type of transpose applied to matrix A.
888 * @param Diag Specifies whether or not A is unit triangular.
889 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
890 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
891 * @param incX The increment for the elements of vector x, must be larger than zero.
892 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700893 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800894 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
895 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
896 }
Miao Wangfb675a52015-05-12 18:22:20 -0700897
898 /**
899 * STRSV solves one of the systems of equations
900 * A*x = b or A**T*x = b
901 *
902 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
903 *
904 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
905 * @param TransA The type of transpose applied to matrix A.
906 * @param Diag Specifies whether or not A is unit triangular.
907 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
908 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
909 * @param incX The increment for the elements of vector x, must be larger than zero.
910 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700911 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800912 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700913 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800914 int N = A.getType().getY();
915 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
916
917 }
Miao Wangfb675a52015-05-12 18:22:20 -0700918
919 /**
920 * DTRSV solves one of the systems of equations
921 * A*x = b or A**T*x = b
922 *
923 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
924 *
925 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
926 * @param TransA The type of transpose applied to matrix A.
927 * @param Diag Specifies whether or not A is unit triangular.
928 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
929 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
930 * @param incX The increment for the elements of vector x, must be larger than zero.
931 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700932 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800933 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700934 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800935 int N = A.getType().getY();
936 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
937
938 }
Miao Wangfb675a52015-05-12 18:22:20 -0700939
940 /**
941 * CTRSV solves one of the systems of equations
942 * A*x = b or A**T*x = b or A**H*x = b
943 *
944 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
945 *
946 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
947 * @param TransA The type of transpose applied to matrix A.
948 * @param Diag Specifies whether or not A is unit triangular.
949 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
950 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
951 * @param incX The increment for the elements of vector x, must be larger than zero.
952 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700953 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800954 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700955 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800956 int N = A.getType().getY();
957 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
958
959 }
Miao Wangfb675a52015-05-12 18:22:20 -0700960
961 /**
962 * ZTRSV solves one of the systems of equations
963 * A*x = b or A**T*x = b or A**H*x = b
964 *
965 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
966 *
967 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
968 * @param TransA The type of transpose applied to matrix A.
969 * @param Diag Specifies whether or not A is unit triangular.
970 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
971 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
972 * @param incX The increment for the elements of vector x, must be larger than zero.
973 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700974 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800975 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700976 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800977 int N = A.getType().getY();
978 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
979
980 }
Miao Wangfb675a52015-05-12 18:22:20 -0700981
982 /**
983 * STBSV solves one of the systems of equations
984 * A*x = b or A**T*x = b
985 *
986 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
987 *
988 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
989 * but only the region N*(K+1) will be referenced. The following subroutine can is an
990 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
991 * for i in range(0, n):
992 * for j in range(i, min(i+k+1, n)):
993 * b[i, j-i] = a[i, j]
994 *
995 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
996 * @param TransA The type of transpose applied to matrix A.
997 * @param Diag Specifies whether or not A is unit triangular.
998 * @param K The number of off-diagonals of the matrix A
999 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1000 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1001 * @param incX The increment for the elements of vector x, must be larger than zero.
1002 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001003 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001004 // TBSV is the same as TRMV + K >= 0
1005 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001006 int N = A.getType().getY();
1007 if (K < 0) {
1008 throw new RSRuntimeException("Number of diagonals must be positive");
1009 }
1010 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1011 }
Miao Wangfb675a52015-05-12 18:22:20 -07001012
1013 /**
1014 * DTBSV solves one of the systems of equations
1015 * A*x = b or A**T*x = b
1016 *
1017 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1018 *
1019 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1020 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1021 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1022 * for i in range(0, n):
1023 * for j in range(i, min(i+k+1, n)):
1024 * b[i, j-i] = a[i, j]
1025 *
1026 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1027 * @param TransA The type of transpose applied to matrix A.
1028 * @param Diag Specifies whether or not A is unit triangular.
1029 * @param K The number of off-diagonals of the matrix A
1030 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1031 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1032 * @param incX The increment for the elements of vector x, must be larger than zero.
1033 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001034 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001035 // TBSV is the same as TRMV + K >= 0
1036 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001037 int N = A.getType().getY();
1038 if (K < 0) {
1039 throw new RSRuntimeException("Number of diagonals must be positive");
1040 }
1041 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1042 }
Miao Wangfb675a52015-05-12 18:22:20 -07001043
1044 /**
1045 * CTBSV solves one of the systems of equations
1046 * A*x = b or A**T*x = b or A**H*x = b
1047 *
1048 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1049 *
1050 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1051 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1052 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1053 * for i in range(0, n):
1054 * for j in range(i, min(i+k+1, n)):
1055 * b[i, j-i] = a[i, j]
1056 *
1057 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1058 * @param TransA The type of transpose applied to matrix A.
1059 * @param Diag Specifies whether or not A is unit triangular.
1060 * @param K The number of off-diagonals of the matrix A
1061 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1062 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1063 * @param incX The increment for the elements of vector x, must be larger than zero.
1064 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001065 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001066 // TBSV is the same as TRMV + K >= 0
1067 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001068 int N = A.getType().getY();
1069 if (K < 0) {
1070 throw new RSRuntimeException("Number of diagonals must be positive");
1071 }
1072 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1073 }
Miao Wangfb675a52015-05-12 18:22:20 -07001074
1075 /**
1076 * ZTBSV solves one of the systems of equations
1077 * A*x = b or A**T*x = b or A**H*x = b
1078 *
1079 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1080 *
1081 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1082 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1083 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1084 * for i in range(0, n):
1085 * for j in range(i, min(i+k+1, n)):
1086 * b[i, j-i] = a[i, j]
1087 *
1088 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1089 * @param TransA The type of transpose applied to matrix A.
1090 * @param Diag Specifies whether or not A is unit triangular.
1091 * @param K The number of off-diagonals of the matrix A
1092 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1093 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1094 * @param incX The increment for the elements of vector x, must be larger than zero.
1095 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001096 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001097 // TBSV is the same as TRMV + K >= 0
1098 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001099 int N = A.getType().getY();
1100 if (K < 0) {
1101 throw new RSRuntimeException("Number of diagonals must be positive");
1102 }
1103 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1104 }
Miao Wangfb675a52015-05-12 18:22:20 -07001105
1106 /**
1107 * STPSV solves one of the systems of equations
1108 * A*x = b or A**T*x = b
1109 *
1110 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1111 *
1112 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1113 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1114 * 'a' to packed matrix 'b'.
1115 * k = 0
1116 * for i in range(0, n):
1117 * for j in range(i, n):
1118 * b[k++] = a[i, j]
1119 *
1120 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1121 * @param TransA The type of transpose applied to matrix A.
1122 * @param Diag Specifies whether or not A is unit triangular.
1123 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1124 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1125 * @param incX The increment for the elements of vector x, must be larger than zero.
1126 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001127 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001128 // TPSV is same as TPMV
1129 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1130 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1131 }
Miao Wangfb675a52015-05-12 18:22:20 -07001132
1133 /**
1134 * DTPSV solves one of the systems of equations
1135 * A*x = b or A**T*x = b
1136 *
1137 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1138 *
1139 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1140 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1141 * 'a' to packed matrix 'b'.
1142 * k = 0
1143 * for i in range(0, n):
1144 * for j in range(i, n):
1145 * b[k++] = a[i, j]
1146 *
1147 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1148 * @param TransA The type of transpose applied to matrix A.
1149 * @param Diag Specifies whether or not A is unit triangular.
1150 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1151 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1152 * @param incX The increment for the elements of vector x, must be larger than zero.
1153 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001154 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001155 // TPSV is same as TPMV
1156 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1157 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1158 }
Miao Wangfb675a52015-05-12 18:22:20 -07001159
1160 /**
1161 * CTPSV solves one of the systems of equations
1162 * A*x = b or A**T*x = b or A**H*x = b
1163 *
1164 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1165 *
1166 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1167 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1168 * 'a' to packed matrix 'b'.
1169 * k = 0
1170 * for i in range(0, n):
1171 * for j in range(i, n):
1172 * b[k++] = a[i, j]
1173 *
1174 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1175 * @param TransA The type of transpose applied to matrix A.
1176 * @param Diag Specifies whether or not A is unit triangular.
1177 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1178 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1179 * @param incX The increment for the elements of vector x, must be larger than zero.
1180 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001181 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001182 // TPSV is same as TPMV
1183 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1184 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1185 }
Miao Wangfb675a52015-05-12 18:22:20 -07001186
1187 /**
1188 * ZTPSV solves one of the systems of equations
1189 * A*x = b or A**T*x = b or A**H*x = b
1190 *
1191 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1192 *
1193 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1194 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1195 * 'a' to packed matrix 'b'.
1196 * k = 0
1197 * for i in range(0, n):
1198 * for j in range(i, n):
1199 * b[k++] = a[i, j]
1200 *
1201 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1202 * @param TransA The type of transpose applied to matrix A.
1203 * @param Diag Specifies whether or not A is unit triangular.
1204 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1205 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1206 * @param incX The increment for the elements of vector x, must be larger than zero.
1207 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001208 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001209 // TPSV is same as TPMV
1210 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1211 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1212 }
1213
1214 /**
1215 * Level 2, S and D only
1216 */
1217 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1218 validateUplo(Uplo);
1219 int N = A.getType().getY();
1220 if (A.getType().getX() != N) {
1221 throw new RSRuntimeException("A must be a square matrix for SYMV");
1222 }
1223 if (!A.getType().getElement().isCompatible(e) ||
1224 !X.getType().getElement().isCompatible(e) ||
1225 !Y.getType().getElement().isCompatible(e) ) {
1226 throw new RSRuntimeException("Called BLAS with wrong Element type");
1227 }
1228 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1229 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1230 }
1231
1232 if (incX <= 0 || incY <= 0) {
1233 throw new RSRuntimeException("Vector increments must be greater than 0");
1234 }
1235 int expectedXDim = 1 + (N - 1) * incX;
1236 if (X.getType().getX() != expectedXDim) {
1237 throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1238 }
1239 int expectedYDim = 1 + (N - 1) * incY;
1240 if (Y.getType().getX() != expectedYDim) {
1241 throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1242 }
1243 return N;
1244 }
1245 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1246 validateUplo(Uplo);
1247 if (!Ap.getType().getElement().isCompatible(e) ||
1248 !X.getType().getElement().isCompatible(e) ||
1249 !Y.getType().getElement().isCompatible(e)) {
1250 throw new RSRuntimeException("Called BLAS with wrong Element type");
1251 }
1252 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1253 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1254 }
1255
1256 if (Ap.getType().getY() > 1) {
1257 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1258 }
1259
1260 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1261 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1262 throw new RSRuntimeException("Invalid dimension for Ap");
1263 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001264 if (incX <= 0 || incY <= 0) {
1265 throw new RSRuntimeException("Vector increments must be greater than 0");
1266 }
Tim Murray25207df2015-01-12 16:47:56 -08001267 int expectedXDim = 1 + (N - 1) * incX;
1268 if (X.getType().getX() != expectedXDim) {
1269 throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1270 }
1271 int expectedYDim = 1 + (N - 1) * incY;
1272 if (Y.getType().getX() != expectedYDim) {
1273 throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1274 }
1275
1276 return N;
1277 }
1278 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1279 if (!A.getType().getElement().isCompatible(e) ||
1280 !X.getType().getElement().isCompatible(e) ||
1281 !Y.getType().getElement().isCompatible(e) ) {
1282 throw new RSRuntimeException("Called BLAS with wrong Element type");
1283 }
1284
1285 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1286 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1287 }
1288
1289 int M = A.getType().getY();
1290 int N = A.getType().getX();
1291
1292 if (N < 1 || M < 1) {
1293 throw new RSRuntimeException("M and N must be 1 or greater for GER");
1294 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001295 if (incX <= 0 || incY <= 0) {
1296 throw new RSRuntimeException("Vector increments must be greater than 0");
1297 }
1298 int expectedXDim = 1 + (M - 1) * incX;
Tim Murray25207df2015-01-12 16:47:56 -08001299 if (X.getType().getX() != expectedXDim) {
1300 throw new RSRuntimeException("Incorrect vector dimensions for GER");
1301 }
1302 int expectedYDim = 1 + (N - 1) * incY;
1303 if (Y.getType().getX() != expectedYDim) {
1304 throw new RSRuntimeException("Incorrect vector dimensions for GER");
1305 }
1306
1307
1308 }
1309 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1310 validateUplo(Uplo);
1311 if (!A.getType().getElement().isCompatible(e) ||
1312 !X.getType().getElement().isCompatible(e)) {
1313 throw new RSRuntimeException("Called BLAS with wrong Element type");
1314 }
1315
1316 int N = A.getType().getX();
1317
1318 if (X.getType().getY() > 1) {
1319 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1320 }
1321 if (N != A.getType().getY()) {
1322 throw new RSRuntimeException("A must be a symmetric matrix");
1323 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001324 if (incX <= 0) {
1325 throw new RSRuntimeException("Vector increments must be greater than 0");
1326 }
Tim Murray25207df2015-01-12 16:47:56 -08001327 int expectedXDim = 1 + (N - 1) * incX;
1328 if (X.getType().getX() != expectedXDim) {
1329 throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1330 }
1331 return N;
1332 }
1333 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1334 validateUplo(Uplo);
1335 if (!Ap.getType().getElement().isCompatible(e) ||
1336 !X.getType().getElement().isCompatible(e)) {
1337 throw new RSRuntimeException("Called BLAS with wrong Element type");
1338 }
1339 if (X.getType().getY() > 1) {
1340 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1341 }
1342
1343 if (Ap.getType().getY() > 1) {
1344 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1345 }
1346
1347 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1348 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1349 throw new RSRuntimeException("Invalid dimension for Ap");
1350 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001351 if (incX <= 0) {
1352 throw new RSRuntimeException("Vector increments must be greater than 0");
1353 }
Tim Murray25207df2015-01-12 16:47:56 -08001354 int expectedXDim = 1 + (N - 1) * incX;
1355 if (X.getType().getX() != expectedXDim) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001356 throw new RSRuntimeException("Incorrect vector dimensions for SPR");
Tim Murray25207df2015-01-12 16:47:56 -08001357 }
1358
1359 return N;
1360 }
1361
1362 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1363 validateUplo(Uplo);
1364 if (!A.getType().getElement().isCompatible(e) ||
1365 !X.getType().getElement().isCompatible(e) ||
1366 !Y.getType().getElement().isCompatible(e)) {
1367 throw new RSRuntimeException("Called BLAS with wrong Element type");
1368 }
1369
1370 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1371 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1372 }
1373
1374 int N = A.getType().getX();
1375
1376 if (N != A.getType().getY()) {
1377 throw new RSRuntimeException("A must be a symmetric matrix");
1378 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001379 if (incX <= 0 || incY <= 0) {
1380 throw new RSRuntimeException("Vector increments must be greater than 0");
1381 }
Tim Murray25207df2015-01-12 16:47:56 -08001382 int expectedXDim = 1 + (N - 1) * incX;
1383 int expectedYDim = 1 + (N - 1) * incY;
1384 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1385 throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1386 }
1387 return N;
1388
1389 }
1390 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1391 validateUplo(Uplo);
1392 if (!Ap.getType().getElement().isCompatible(e) ||
1393 !X.getType().getElement().isCompatible(e) ||
1394 !Y.getType().getElement().isCompatible(e)) {
1395 throw new RSRuntimeException("Called BLAS with wrong Element type");
1396 }
1397 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1398 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1399 }
1400
1401 if (Ap.getType().getY() > 1) {
1402 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1403 }
1404
1405 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1406 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1407 throw new RSRuntimeException("Invalid dimension for Ap");
1408 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001409 if (incX <= 0 || incY <= 0) {
1410 throw new RSRuntimeException("Vector increments must be greater than 0");
1411 }
Tim Murray25207df2015-01-12 16:47:56 -08001412 int expectedXDim = 1 + (N - 1) * incX;
1413 int expectedYDim = 1 + (N - 1) * incY;
1414 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001415 throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
Tim Murray25207df2015-01-12 16:47:56 -08001416 }
1417
1418 return N;
1419 }
1420
Miao Wangfb675a52015-05-12 18:22:20 -07001421 /**
1422 * SSYMV performs the matrix-vector operation
1423 * y := alpha*A*x + beta*y
1424 *
1425 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1426 *
1427 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1428 * @param alpha The scalar alpha.
1429 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1430 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1431 * @param incX The increment for the elements of vector x, must be larger than zero.
1432 * @param beta The scalar beta.
1433 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1434 * @param incY The increment for the elements of vector y, must be larger than zero.
1435 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001436 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001437 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1438 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1439 }
Miao Wangfb675a52015-05-12 18:22:20 -07001440
1441 /**
1442 * SSBMV performs the matrix-vector operation
1443 * y := alpha*A*x + beta*y
1444 *
1445 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1446 *
1447 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1448 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1449 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1450 * for i in range(0, n):
1451 * for j in range(i, min(i+k+1, n)):
1452 * b[i, j-i] = a[i, j]
1453 *
1454 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1455 * @param K The number of off-diagonals of the matrix A
1456 * @param alpha The scalar alpha.
1457 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1458 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1459 * @param incX The increment for the elements of vector x, must be larger than zero.
1460 * @param beta The scalar beta.
1461 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1462 * @param incY The increment for the elements of vector y, must be larger than zero.
1463 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001464 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001465 // SBMV is the same as SYMV + K >= 0
1466 if (K < 0) {
1467 throw new RSRuntimeException("K must be greater than or equal to 0");
1468 }
Tim Murray25207df2015-01-12 16:47:56 -08001469 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1470 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1471 }
Miao Wangfb675a52015-05-12 18:22:20 -07001472
1473 /**
1474 * SSPMV performs the matrix-vector operation
1475 * y := alpha*A*x + beta*y
1476 *
1477 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1478 *
1479 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1480 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1481 * 'a' to packed matrix 'b'.
1482 * k = 0
1483 * for i in range(0, n):
1484 * for j in range(i, n):
1485 * b[k++] = a[i, j]
1486 *
1487 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1488 * @param alpha The scalar alpha.
1489 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1490 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1491 * @param incX The increment for the elements of vector x, must be larger than zero.
1492 * @param beta The scalar beta.
1493 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1494 * @param incY The increment for the elements of vector y, must be larger than zero.
1495 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001496 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001497 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1498 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1499 }
Miao Wangfb675a52015-05-12 18:22:20 -07001500
1501 /**
1502 * SGER performs the rank 1 operation
1503 * A := alpha*x*y**T + A
1504 *
1505 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1506 *
1507 * @param alpha The scalar alpha.
1508 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1509 * @param incX The increment for the elements of vector x, must be larger than zero.
1510 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1511 * @param incY The increment for the elements of vector y, must be larger than zero.
1512 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1513 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001514 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001515 int M = A.getType().getY();
1516 int N = A.getType().getX();
Miao Wang68ca43e2015-04-23 15:06:09 -07001517 validateGER(Element.F32(mRS), X, incX, Y, incY, A);
Tim Murray25207df2015-01-12 16:47:56 -08001518 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1519 }
Miao Wangfb675a52015-05-12 18:22:20 -07001520
1521 /**
1522 * SSYR performs the rank 1 operation
1523 * A := alpha*x*x**T + A
1524 *
1525 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1526 *
1527 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1528 * @param alpha The scalar alpha.
1529 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1530 * @param incX The increment for the elements of vector x, must be larger than zero.
1531 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1532 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001533 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001534 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1535 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1536 }
Miao Wangfb675a52015-05-12 18:22:20 -07001537
1538 /**
1539 * SSPR performs the rank 1 operation
1540 * A := alpha*x*x**T + A
1541 *
1542 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1543 *
1544 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1545 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1546 * 'a' to packed matrix 'b'.
1547 * k = 0
1548 * for i in range(0, n):
1549 * for j in range(i, n):
1550 * b[k++] = a[i, j]
1551 *
1552 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1553 * @param alpha The scalar alpha.
1554 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1555 * @param incX The increment for the elements of vector x, must be larger than zero.
1556 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1557 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001558 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001559 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1560 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1561 }
Miao Wangfb675a52015-05-12 18:22:20 -07001562
1563 /**
1564 * SSYR2 performs the symmetric rank 2 operation
1565 * A := alpha*x*y**T + alpha*y*x**T + A
1566 *
1567 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1568 *
1569 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1570 * @param alpha The scalar alpha.
1571 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1572 * @param incX The increment for the elements of vector x, must be larger than zero.
1573 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1574 * @param incY The increment for the elements of vector y, must be larger than zero.
1575 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1576 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001577 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001578 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1579 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1580 }
Miao Wangfb675a52015-05-12 18:22:20 -07001581
1582 /**
1583 * SSPR2 performs the symmetric rank 2 operation
1584 * A := alpha*x*y**T + alpha*y*x**T + A
1585 *
1586 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1587 *
1588 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1589 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1590 * 'a' to packed matrix 'b'.
1591 * k = 0
1592 * for i in range(0, n):
1593 * for j in range(i, n):
1594 * b[k++] = a[i, j]
1595 *
1596 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1597 * @param alpha The scalar alpha.
1598 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1599 * @param incX The increment for the elements of vector x, must be larger than zero.
1600 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1601 * @param incY The increment for the elements of vector y, must be larger than zero.
1602 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1603 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001604 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001605 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1606 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1607 }
Miao Wangfb675a52015-05-12 18:22:20 -07001608
1609 /**
1610 * DSYMV performs the matrix-vector operation
1611 * y := alpha*A*x + beta*y
1612 *
1613 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1614 *
1615 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1616 * @param alpha The scalar alpha.
1617 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1618 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1619 * @param incX The increment for the elements of vector x, must be larger than zero.
1620 * @param beta The scalar beta.
1621 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1622 * @param incY The increment for the elements of vector y, must be larger than zero.
1623 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001624 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001625 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1626 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1627 }
Miao Wangfb675a52015-05-12 18:22:20 -07001628
1629 /**
1630 * DSBMV performs the matrix-vector operation
1631 * y := alpha*A*x + beta*y
1632 *
1633 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1634 *
1635 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1636 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1637 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1638 * for i in range(0, n):
1639 * for j in range(i, min(i+k+1, n)):
1640 * b[i, j-i] = a[i, j]
1641 *
1642 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1643 * @param K The number of off-diagonals of the matrix A
1644 * @param alpha The scalar alpha.
1645 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1646 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1647 * @param incX The increment for the elements of vector x, must be larger than zero.
1648 * @param beta The scalar beta.
1649 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1650 * @param incY The increment for the elements of vector y, must be larger than zero.
1651 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001652 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001653 // SBMV is the same as SYMV + K >= 0
1654 if (K < 0) {
1655 throw new RSRuntimeException("K must be greater than or equal to 0");
1656 }
Tim Murray25207df2015-01-12 16:47:56 -08001657 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1658 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1659 }
Miao Wangfb675a52015-05-12 18:22:20 -07001660
1661 /**
1662 * DSPMV performs the matrix-vector operation
1663 * y := alpha*A*x + beta*y
1664 *
1665 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
1666 *
1667 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1668 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1669 * 'a' to packed matrix 'b'.
1670 * k = 0
1671 * for i in range(0, n):
1672 * for j in range(i, n):
1673 * b[k++] = a[i, j]
1674 *
1675 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1676 * @param alpha The scalar alpha.
1677 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1678 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1679 * @param incX The increment for the elements of vector x, must be larger than zero.
1680 * @param beta The scalar beta.
1681 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1682 * @param incY The increment for the elements of vector y, must be larger than zero.
1683 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001684 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001685 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
1686 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1687 }
Miao Wangfb675a52015-05-12 18:22:20 -07001688
1689 /**
1690 * DGER performs the rank 1 operation
1691 * A := alpha*x*y**T + A
1692 *
1693 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
1694 *
1695 * @param alpha The scalar alpha.
1696 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1697 * @param incX The increment for the elements of vector x, must be larger than zero.
1698 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1699 * @param incY The increment for the elements of vector y, must be larger than zero.
1700 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1701 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001702 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001703 int M = A.getType().getY();
1704 int N = A.getType().getX();
Miao Wang68ca43e2015-04-23 15:06:09 -07001705 validateGER(Element.F64(mRS), X, incX, Y, incY, A);
Tim Murray25207df2015-01-12 16:47:56 -08001706 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1707 }
Miao Wangfb675a52015-05-12 18:22:20 -07001708
1709 /**
1710 * DSYR performs the rank 1 operation
1711 * A := alpha*x*x**T + A
1712 *
1713 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
1714 *
1715 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1716 * @param alpha The scalar alpha.
1717 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1718 * @param incX The increment for the elements of vector x, must be larger than zero.
1719 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1720 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001721 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001722 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
1723 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1724 }
Miao Wangfb675a52015-05-12 18:22:20 -07001725
1726 /**
1727 * DSPR performs the rank 1 operation
1728 * A := alpha*x*x**T + A
1729 *
1730 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
1731 *
1732 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1733 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1734 * 'a' to packed matrix 'b'.
1735 * k = 0
1736 * for i in range(0, n):
1737 * for j in range(i, n):
1738 * b[k++] = a[i, j]
1739 *
1740 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1741 * @param alpha The scalar alpha.
1742 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1743 * @param incX The increment for the elements of vector x, must be larger than zero.
1744 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1745 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001746 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001747 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
1748 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1749 }
Miao Wangfb675a52015-05-12 18:22:20 -07001750
1751 /**
1752 * DSYR2 performs the symmetric rank 2 operation
1753 * A := alpha*x*y**T + alpha*y*x**T + A
1754 *
1755 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
1756 *
1757 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1758 * @param alpha The scalar alpha.
1759 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1760 * @param incX The increment for the elements of vector x, must be larger than zero.
1761 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1762 * @param incY The increment for the elements of vector y, must be larger than zero.
1763 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1764 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001765 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001766 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
1767 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1768 }
Miao Wangfb675a52015-05-12 18:22:20 -07001769
1770 /**
1771 * DSPR2 performs the symmetric rank 2 operation
1772 * A := alpha*x*y**T + alpha*y*x**T + A
1773 *
1774 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
1775 *
1776 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1777 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1778 * 'a' to packed matrix 'b'.
1779 * k = 0
1780 * for i in range(0, n):
1781 * for j in range(i, n):
1782 * b[k++] = a[i, j]
1783 *
1784 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1785 * @param alpha The scalar alpha.
1786 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1787 * @param incX The increment for the elements of vector x, must be larger than zero.
1788 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1789 * @param incY The increment for the elements of vector y, must be larger than zero.
1790 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1791 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001792 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001793 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
1794 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1795 }
1796
1797
1798 /**
1799 * Level 2, C and Z only
1800 */
1801
1802 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1803 if (!A.getType().getElement().isCompatible(e) ||
1804 !X.getType().getElement().isCompatible(e) ||
1805 !Y.getType().getElement().isCompatible(e)) {
1806 throw new RSRuntimeException("Called BLAS with wrong Element type");
1807 }
1808 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1809 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1810 }
1811
1812 int M = A.getType().getY();
1813 int N = A.getType().getX();
Miao Wang68ca43e2015-04-23 15:06:09 -07001814 if (incX <= 0 || incY <= 0) {
1815 throw new RSRuntimeException("Vector increments must be greater than 0");
1816 }
1817 int expectedXDim = 1 + (M - 1) * incX;
Tim Murray25207df2015-01-12 16:47:56 -08001818 if (X.getType().getX() != expectedXDim) {
1819 throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1820 }
1821 int expectedYDim = 1 + (N - 1) * incY;
1822 if (Y.getType().getX() != expectedYDim) {
1823 throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1824 }
1825
1826 }
1827
Miao Wangfb675a52015-05-12 18:22:20 -07001828 /**
1829 * CHEMV performs the matrix-vector operation
1830 * y := alpha*A*x + beta*y
1831 *
1832 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
1833 *
1834 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1835 * @param alpha The scalar alpha.
1836 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1837 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1838 * @param incX The increment for the elements of vector x, must be larger than zero.
1839 * @param beta The scalar beta.
1840 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1841 * @param incY The increment for the elements of vector y, must be larger than zero.
1842 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001843 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001844 // HEMV is the same as SYR2 validation-wise
1845 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1846 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1847 }
Miao Wangfb675a52015-05-12 18:22:20 -07001848
1849 /**
1850 * CHBMV performs the matrix-vector operation
1851 * y := alpha*A*x + beta*y
1852 *
1853 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
1854 *
1855 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1856 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1857 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1858 * for i in range(0, n):
1859 * for j in range(i, min(i+k+1, n)):
1860 * b[i, j-i] = a[i, j]
1861 *
1862 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1863 * @param K The number of off-diagonals of the matrix A
1864 * @param alpha The scalar alpha.
1865 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1866 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1867 * @param incX The increment for the elements of vector x, must be larger than zero.
1868 * @param beta The scalar beta.
1869 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1870 * @param incY The increment for the elements of vector y, must be larger than zero.
1871 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001872 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001873 // HBMV is the same as SYR2 validation-wise
1874 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1875 if (K < 0) {
1876 throw new RSRuntimeException("K must be 0 or greater for HBMV");
1877 }
1878 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1879 }
Miao Wangfb675a52015-05-12 18:22:20 -07001880
1881 /**
1882 * CHPMV performs the matrix-vector operation
1883 * y := alpha*A*x + beta*y
1884 *
1885 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
1886 *
1887 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1888 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1889 * 'a' to packed matrix 'b'.
1890 * k = 0
1891 * for i in range(0, n):
1892 * for j in range(i, n):
1893 * b[k++] = a[i, j]
1894 *
1895 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1896 * @param alpha The scalar alpha.
1897 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1898 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1899 * @param incX The increment for the elements of vector x, must be larger than zero.
1900 * @param beta The scalar beta.
1901 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1902 * @param incY The increment for the elements of vector y, must be larger than zero.
1903 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001904 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001905 // HPMV is the same as SPR2
1906 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
1907 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1908 }
Miao Wangfb675a52015-05-12 18:22:20 -07001909
1910 /**
1911 * CGERU performs the rank 1 operation
1912 * A := alpha*x*y**T + A
1913 *
1914 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
1915 *
1916 * @param alpha The scalar alpha.
1917 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1918 * @param incX The increment for the elements of vector x, must be larger than zero.
1919 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1920 * @param incY The increment for the elements of vector y, must be larger than zero.
1921 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1922 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001923 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001924 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1925 int M = A.getType().getY();
1926 int N = A.getType().getX();
1927 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1928 }
Miao Wangfb675a52015-05-12 18:22:20 -07001929
1930 /**
1931 * CGERC performs the rank 1 operation
1932 * A := alpha*x*y**H + A
1933 *
1934 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
1935 *
1936 * @param alpha The scalar alpha.
1937 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1938 * @param incX The increment for the elements of vector x, must be larger than zero.
1939 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1940 * @param incY The increment for the elements of vector y, must be larger than zero.
1941 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1942 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001943 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001944 // same as GERU
1945 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1946 int M = A.getType().getY();
1947 int N = A.getType().getX();
1948 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1949 }
Miao Wangfb675a52015-05-12 18:22:20 -07001950
1951 /**
1952 * CHER performs the rank 1 operation
1953 * A := alpha*x*x**H + A
1954 *
1955 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
1956 *
1957 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1958 * @param alpha The scalar alpha.
1959 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1960 * @param incX The increment for the elements of vector x, must be larger than zero.
1961 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1962 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001963 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001964 // same as SYR
Miao Wang68ca43e2015-04-23 15:06:09 -07001965 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
Tim Murray25207df2015-01-12 16:47:56 -08001966 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
1967 }
Miao Wangfb675a52015-05-12 18:22:20 -07001968
1969 /**
1970 * CHPR performs the rank 1 operation
1971 * A := alpha*x*x**H + A
1972 *
1973 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
1974 *
1975 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1976 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1977 * 'a' to packed matrix 'b'.
1978 * k = 0
1979 * for i in range(0, n):
1980 * for j in range(i, n):
1981 * b[k++] = a[i, j]
1982 *
1983 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1984 * @param alpha The scalar alpha.
1985 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1986 * @param incX The increment for the elements of vector x, must be larger than zero.
1987 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1988 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001989 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001990 // equivalent to SPR for validation
1991 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
1992 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
1993 }
Miao Wangfb675a52015-05-12 18:22:20 -07001994
1995 /**
1996 * CHER2 performs the symmetric rank 2 operation
1997 * A := alpha*x*y**H + alpha*y*x**H + A
1998 *
1999 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2000 *
2001 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2002 * @param alpha The scalar alpha.
2003 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2004 * @param incX The increment for the elements of vector x, must be larger than zero.
2005 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2006 * @param incY The increment for the elements of vector y, must be larger than zero.
2007 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2008 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002009 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002010 // same as SYR2
2011 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2012 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2013 }
Miao Wangfb675a52015-05-12 18:22:20 -07002014
2015 /**
2016 * CHPR2 performs the symmetric rank 2 operation
2017 * A := alpha*x*y**H + alpha*y*x**H + A
2018 *
2019 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2020 *
2021 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2022 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2023 * 'a' to packed matrix 'b'.
2024 * k = 0
2025 * for i in range(0, n):
2026 * for j in range(i, n):
2027 * b[k++] = a[i, j]
2028 *
2029 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2030 * @param alpha The scalar alpha.
2031 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2032 * @param incX The increment for the elements of vector x, must be larger than zero.
2033 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2034 * @param incY The increment for the elements of vector y, must be larger than zero.
2035 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2036 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002037 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08002038 // same as SPR2
2039 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2040 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2041 }
Miao Wangfb675a52015-05-12 18:22:20 -07002042
2043 /**
2044 * ZHEMV performs the matrix-vector operation
2045 * y := alpha*A*x + beta*y
2046 *
2047 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2048 *
2049 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2050 * @param alpha The scalar alpha.
2051 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2052 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2053 * @param incX The increment for the elements of vector x, must be larger than zero.
2054 * @param beta The scalar beta.
2055 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2056 * @param incY The increment for the elements of vector y, must be larger than zero.
2057 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002058 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08002059 // HEMV is the same as SYR2 validation-wise
2060 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2061 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2062 }
Miao Wangfb675a52015-05-12 18:22:20 -07002063
2064 /**
2065 * ZHBMV performs the matrix-vector operation
2066 * y := alpha*A*x + beta*y
2067 *
2068 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2069 *
2070 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2071 * but only the region N*(K+1) will be referenced. The following subroutine can is an
2072 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2073 * for i in range(0, n):
2074 * for j in range(i, min(i+k+1, n)):
2075 * b[i, j-i] = a[i, j]
2076 *
2077 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2078 * @param K The number of off-diagonals of the matrix A
2079 * @param alpha The scalar alpha.
2080 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2081 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2082 * @param incX The increment for the elements of vector x, must be larger than zero.
2083 * @param beta The scalar beta.
2084 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2085 * @param incY The increment for the elements of vector y, must be larger than zero.
2086 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002087 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08002088 // HBMV is the same as SYR2 validation-wise
2089 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2090 if (K < 0) {
2091 throw new RSRuntimeException("K must be 0 or greater for HBMV");
2092 }
2093 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2094 }
Miao Wangfb675a52015-05-12 18:22:20 -07002095
2096 /**
2097 * ZHPMV performs the matrix-vector operation
2098 * y := alpha*A*x + beta*y
2099 *
2100 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2101 *
2102 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2103 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2104 * 'a' to packed matrix 'b'.
2105 * k = 0
2106 * for i in range(0, n):
2107 * for j in range(i, n):
2108 * b[k++] = a[i, j]
2109 *
2110 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2111 * @param alpha The scalar alpha.
2112 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2113 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2114 * @param incX The increment for the elements of vector x, must be larger than zero.
2115 * @param beta The scalar beta.
2116 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2117 * @param incY The increment for the elements of vector y, must be larger than zero.
2118 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002119 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08002120 // HPMV is the same as SPR2
2121 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2122 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2123 }
Miao Wangfb675a52015-05-12 18:22:20 -07002124
2125 /**
2126 * ZGERU performs the rank 1 operation
2127 * A := alpha*x*y**T + A
2128 *
2129 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2130 *
2131 * @param alpha The scalar alpha.
2132 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2133 * @param incX The increment for the elements of vector x, must be larger than zero.
2134 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2135 * @param incY The increment for the elements of vector y, must be larger than zero.
2136 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2137 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002138 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002139 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2140 int M = A.getType().getY();
2141 int N = A.getType().getX();
2142 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2143 }
Miao Wangfb675a52015-05-12 18:22:20 -07002144
2145 /**
2146 * ZGERC performs the rank 1 operation
2147 * A := alpha*x*y**H + A
2148 *
2149 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2150 *
2151 * @param alpha The scalar alpha.
2152 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2153 * @param incX The increment for the elements of vector x, must be larger than zero.
2154 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2155 * @param incY The increment for the elements of vector y, must be larger than zero.
2156 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2157 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002158 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002159 // same as GERU
2160 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2161 int M = A.getType().getY();
2162 int N = A.getType().getX();
2163 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2164 }
Miao Wangfb675a52015-05-12 18:22:20 -07002165
2166 /**
2167 * ZHER performs the rank 1 operation
2168 * A := alpha*x*x**H + A
2169 *
2170 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2171 *
2172 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2173 * @param alpha The scalar alpha.
2174 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2175 * @param incX The increment for the elements of vector x, must be larger than zero.
2176 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2177 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002178 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002179 // same as SYR
Miao Wangcecc00a2015-04-29 18:14:55 -07002180 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
Tim Murray25207df2015-01-12 16:47:56 -08002181 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
2182 }
Miao Wangfb675a52015-05-12 18:22:20 -07002183
2184 /**
2185 * ZHPR performs the rank 1 operation
2186 * A := alpha*x*x**H + A
2187 *
2188 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2189 *
2190 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2191 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2192 * 'a' to packed matrix 'b'.
2193 * k = 0
2194 * for i in range(0, n):
2195 * for j in range(i, n):
2196 * b[k++] = a[i, j]
2197 *
2198 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2199 * @param alpha The scalar alpha.
2200 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2201 * @param incX The increment for the elements of vector x, must be larger than zero.
2202 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2203 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002204 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08002205 // equivalent to SPR for validation
2206 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2207 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
2208 }
Miao Wangfb675a52015-05-12 18:22:20 -07002209
2210 /**
2211 * ZHER2 performs the symmetric rank 2 operation
2212 * A := alpha*x*y**H + alpha*y*x**H + A
2213 *
2214 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2215 *
2216 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2217 * @param alpha The scalar alpha.
2218 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2219 * @param incX The increment for the elements of vector x, must be larger than zero.
2220 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2221 * @param incY The increment for the elements of vector y, must be larger than zero.
2222 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2223 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002224 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002225 // same as SYR2
2226 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2227 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2228 }
Miao Wangfb675a52015-05-12 18:22:20 -07002229
2230 /**
2231 * ZHPR2 performs the symmetric rank 2 operation
2232 * A := alpha*x*y**H + alpha*y*x**H + A
2233 *
2234 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2235 *
2236 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2237 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2238 * 'a' to packed matrix 'b'.
2239 * k = 0
2240 * for i in range(0, n):
2241 * for j in range(i, n):
2242 * b[k++] = a[i, j]
2243 *
2244 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2245 * @param alpha The scalar alpha.
2246 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2247 * @param incX The increment for the elements of vector x, must be larger than zero.
2248 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2249 * @param incY The increment for the elements of vector y, must be larger than zero.
2250 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2251 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002252 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08002253 // same as SPR2
2254 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2255 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2256 }
2257
2258
2259 /**
2260 * Level 3 BLAS
2261 */
2262
2263 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002264 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
Tim Murray25207df2015-01-12 16:47:56 -08002265 if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2266 (B != null && !B.getType().getElement().isCompatible(e)) ||
2267 (C != null && !C.getType().getElement().isCompatible(e))) {
2268 throw new RSRuntimeException("Called BLAS with wrong Element type");
2269 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002270 if (C == null) {
2271 //since matrix C is used to store the result, it cannot be null.
2272 throw new RSRuntimeException("Allocation C cannot be null");
Tim Murray25207df2015-01-12 16:47:56 -08002273 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002274 cM = C.getType().getY();
2275 cN = C.getType().getX();
2276
Tim Murray25207df2015-01-12 16:47:56 -08002277 if (Side == RIGHT) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002278 if ((A == null && B != null) || (A != null && B == null)) {
2279 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2280 }
Tim Murray25207df2015-01-12 16:47:56 -08002281 if (B != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002282 bM = A.getType().getY();
2283 bN = A.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002284 }
2285 if (A != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002286 aM = B.getType().getY();
2287 aN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002288 }
2289 } else {
2290 if (A != null) {
Miao Wang1e940d82015-04-30 10:47:42 -07002291 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002292 aN = A.getType().getY();
2293 aM = A.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002294 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07002295 aM = A.getType().getY();
2296 aN = A.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002297 }
2298 }
2299 if (B != null) {
Miao Wang1e940d82015-04-30 10:47:42 -07002300 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002301 bN = B.getType().getY();
2302 bM = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002303 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07002304 bM = B.getType().getY();
2305 bN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002306 }
2307 }
2308 }
2309 if (A != null && B != null && C != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002310 if (aN != bM || aM != cM || bN != cN) {
Tim Murray25207df2015-01-12 16:47:56 -08002311 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2312 }
2313 } else if (A != null && C != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002314 // A and C only, for SYRK
2315 if (cM != cN) {
2316 throw new RSRuntimeException("Matrix C is not symmetric");
2317 }
Miao Wang50a8ff12015-05-01 15:32:24 -07002318 if (aM != cM) {
2319 throw new RSRuntimeException("Called BLAS with invalid dimensions");
Tim Murray25207df2015-01-12 16:47:56 -08002320 }
2321 } else if (A != null && B != null) {
2322 // A and B only
Miao Wang37ae07c2015-04-24 11:19:53 -07002323 if (aN != bM) {
2324 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2325 }
Tim Murray25207df2015-01-12 16:47:56 -08002326 }
2327
2328 }
2329
Miao Wangfb675a52015-05-12 18:22:20 -07002330 /**
2331 * SGEMM performs one of the matrix-matrix operations
2332 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T
2333 *
2334 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2335 *
2336 * @param TransA The type of transpose applied to matrix A.
2337 * @param TransB The type of transpose applied to matrix B.
2338 * @param alpha The scalar alpha.
2339 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2340 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2341 * @param beta The scalar beta.
2342 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2343 */
Tim Murray25207df2015-01-12 16:47:56 -08002344 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2345 Allocation B, float beta, Allocation C) {
2346 validateTranspose(TransA);
2347 validateTranspose(TransB);
2348 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2349
2350 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002351 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002352 M = A.getType().getX();
2353 K = A.getType().getY();
2354 } else {
2355 M = A.getType().getY();
2356 K = A.getType().getX();
2357 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002358 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002359 N = B.getType().getY();
2360 } else {
2361 N = B.getType().getX();
2362 }
2363 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),
2364 beta, C.getID(mRS), 0, 0, 0, 0);
2365 }
Miao Wangfb675a52015-05-12 18:22:20 -07002366
2367 /**
2368 * DGEMM performs one of the matrix-matrix operations
2369 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T
2370 *
2371 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2372 *
2373 * @param TransA The type of transpose applied to matrix A.
2374 * @param TransB The type of transpose applied to matrix B.
2375 * @param alpha The scalar alpha.
2376 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2377 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2378 * @param beta The scalar beta.
2379 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2380 */
Tim Murray25207df2015-01-12 16:47:56 -08002381 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2382 Allocation B, double beta, Allocation C) {
2383 validateTranspose(TransA);
2384 validateTranspose(TransB);
2385 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2386 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002387 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002388 M = A.getType().getX();
2389 K = A.getType().getY();
2390 } else {
2391 M = A.getType().getY();
2392 K = A.getType().getX();
2393 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002394 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002395 N = B.getType().getY();
2396 } else {
2397 N = B.getType().getX();
2398 }
2399 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),
2400 beta, C.getID(mRS), 0, 0, 0, 0);
2401 }
Miao Wangfb675a52015-05-12 18:22:20 -07002402
2403 /**
2404 * CGEMM performs one of the matrix-matrix operations
2405 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H
2406 *
2407 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
2408 *
2409 * @param TransA The type of transpose applied to matrix A.
2410 * @param TransB The type of transpose applied to matrix B.
2411 * @param alpha The scalar alpha.
2412 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2413 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2414 * @param beta The scalar beta.
2415 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2416 */
Tim Murray25207df2015-01-12 16:47:56 -08002417 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
2418 Allocation B, Float2 beta, Allocation C) {
2419 validateTranspose(TransA);
2420 validateTranspose(TransB);
2421 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
2422 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002423 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002424 M = A.getType().getX();
2425 K = A.getType().getY();
2426 } else {
2427 M = A.getType().getY();
2428 K = A.getType().getX();
2429 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002430 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002431 N = B.getType().getY();
2432 } else {
2433 N = B.getType().getX();
2434 }
2435 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2436 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2437 }
2438
Miao Wangfb675a52015-05-12 18:22:20 -07002439 /**
2440 * ZGEMM performs one of the matrix-matrix operations
2441 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H
2442 *
2443 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
2444 *
2445 * @param TransA The type of transpose applied to matrix A.
2446 * @param TransB The type of transpose applied to matrix B.
2447 * @param alpha The scalar alpha.
Elliot Waite54de77472017-01-11 15:30:35 -08002448 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2449 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
Miao Wangfb675a52015-05-12 18:22:20 -07002450 * @param beta The scalar beta.
Elliot Waite54de77472017-01-11 15:30:35 -08002451 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
Miao Wangfb675a52015-05-12 18:22:20 -07002452 */
Tim Murray25207df2015-01-12 16:47:56 -08002453 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
2454 Allocation B, Double2 beta, Allocation C) {
2455 validateTranspose(TransA);
2456 validateTranspose(TransB);
2457 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
2458 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002459 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002460 M = A.getType().getX();
2461 K = A.getType().getY();
2462 } else {
2463 M = A.getType().getY();
2464 K = A.getType().getX();
2465 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002466 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002467 N = B.getType().getY();
2468 } else {
2469 N = B.getType().getX();
2470 }
2471 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2472 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2473 }
2474
Miao Wangfb675a52015-05-12 18:22:20 -07002475 /**
2476 * SSYMM performs one of the matrix-matrix operations
2477 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2478 *
2479 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
2480 *
2481 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2482 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2483 * @param alpha The scalar alpha.
2484 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2485 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2486 * @param beta The scalar beta.
2487 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2488 */
Tim Murray25207df2015-01-12 16:47:56 -08002489 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
2490 Allocation B, float beta, Allocation C) {
2491 validateSide(Side);
2492 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002493 //For SYMM, Matrix A should be symmetric
2494 if (A.getType().getX() != A.getType().getY()) {
2495 throw new RSRuntimeException("Matrix A is not symmetric");
2496 }
Tim Murray25207df2015-01-12 16:47:56 -08002497 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
2498 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2499 beta, C.getID(mRS), 0, 0, 0, 0);
2500 }
Miao Wangfb675a52015-05-12 18:22:20 -07002501
2502 /**
2503 * DSYMM performs one of the matrix-matrix operations
2504 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2505 *
2506 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
2507 *
2508 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2509 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2510 * @param alpha The scalar alpha.
2511 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2512 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2513 * @param beta The scalar beta.
2514 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2515 */
Tim Murray25207df2015-01-12 16:47:56 -08002516 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
2517 Allocation B, double beta, Allocation C) {
2518 validateSide(Side);
2519 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002520 if (A.getType().getX() != A.getType().getY()) {
2521 throw new RSRuntimeException("Matrix A is not symmetric");
2522 }
Tim Murray25207df2015-01-12 16:47:56 -08002523 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
2524 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2525 beta, C.getID(mRS), 0, 0, 0, 0);
2526 }
Miao Wangfb675a52015-05-12 18:22:20 -07002527
2528 /**
2529 * CSYMM performs one of the matrix-matrix operations
2530 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2531 *
2532 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
2533 *
2534 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2535 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2536 * @param alpha The scalar alpha.
2537 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2538 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2539 * @param beta The scalar beta.
2540 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2541 */
Tim Murray25207df2015-01-12 16:47:56 -08002542 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
2543 Allocation B, Float2 beta, Allocation C) {
2544 validateSide(Side);
2545 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002546 if (A.getType().getX() != A.getType().getY()) {
2547 throw new RSRuntimeException("Matrix A is not symmetric");
2548 }
Tim Murray25207df2015-01-12 16:47:56 -08002549 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
2550 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2551 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2552 }
Miao Wangfb675a52015-05-12 18:22:20 -07002553
2554 /**
2555 * ZSYMM performs one of the matrix-matrix operations
2556 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2557 *
2558 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
2559 *
2560 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2561 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2562 * @param alpha The scalar alpha.
2563 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2564 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2565 * @param beta The scalar beta.
2566 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2567 */
Tim Murray25207df2015-01-12 16:47:56 -08002568 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
2569 Allocation B, Double2 beta, Allocation C) {
2570 validateSide(Side);
2571 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002572 if (A.getType().getX() != A.getType().getY()) {
2573 throw new RSRuntimeException("Matrix A is not symmetric");
2574 }
Tim Murray25207df2015-01-12 16:47:56 -08002575 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
2576 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2577 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2578 }
2579
Miao Wangfb675a52015-05-12 18:22:20 -07002580 /**
2581 * SSYRK performs one of the symmetric rank k operations
2582 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2583 *
2584 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
2585 *
2586 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2587 * @param Trans The type of transpose applied to the operation.
2588 * @param alpha The scalar alpha.
2589 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2590 * @param beta The scalar beta.
2591 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2592 */
Tim Murray25207df2015-01-12 16:47:56 -08002593 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
2594 validateTranspose(Trans);
2595 validateUplo(Uplo);
2596 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
2597 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002598 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002599 K = A.getType().getY();
2600 } else {
2601 K = A.getType().getX();
2602 }
2603
2604 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2605 }
2606
Miao Wangfb675a52015-05-12 18:22:20 -07002607 /**
2608 * DSYRK performs one of the symmetric rank k operations
2609 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2610 *
2611 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
2612 *
2613 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2614 * @param Trans The type of transpose applied to the operation.
2615 * @param alpha The scalar alpha.
2616 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2617 * @param beta The scalar beta.
2618 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2619 */
Tim Murray25207df2015-01-12 16:47:56 -08002620 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
2621 validateTranspose(Trans);
2622 validateUplo(Uplo);
2623 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
2624 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002625 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002626 K = A.getType().getY();
2627 } else {
2628 K = A.getType().getX();
2629 }
2630 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2631 }
Miao Wangfb675a52015-05-12 18:22:20 -07002632
2633 /**
2634 * CSYRK performs one of the symmetric rank k operations
2635 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2636 *
2637 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
2638 *
2639 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2640 * @param Trans The type of transpose applied to the operation.
2641 * @param alpha The scalar alpha.
2642 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2643 * @param beta The scalar beta.
2644 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2645 */
Miao Wang4c472742015-04-22 15:57:57 -07002646 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08002647 validateTranspose(Trans);
2648 validateUplo(Uplo);
2649 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
2650 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002651 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002652 K = A.getType().getY();
2653 } else {
2654 K = A.getType().getX();
2655 }
Miao Wang4c472742015-04-22 15:57:57 -07002656 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
Tim Murray25207df2015-01-12 16:47:56 -08002657 C.getID(mRS), 0, 0, 0, 0);
2658 }
Miao Wangfb675a52015-05-12 18:22:20 -07002659
2660 /**
2661 * ZSYRK performs one of the symmetric rank k operations
2662 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2663 *
2664 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
2665 *
2666 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2667 * @param Trans The type of transpose applied to the operation.
2668 * @param alpha The scalar alpha.
2669 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2670 * @param beta The scalar beta.
2671 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2672 */
Miao Wang4c472742015-04-22 15:57:57 -07002673 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08002674 validateTranspose(Trans);
2675 validateUplo(Uplo);
2676 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
2677 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002678 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002679 K = A.getType().getY();
2680 } else {
2681 K = A.getType().getX();
2682 }
Miao Wang4c472742015-04-22 15:57:57 -07002683 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
Tim Murray25207df2015-01-12 16:47:56 -08002684 C.getID(mRS), 0, 0, 0, 0);
2685 }
2686
2687 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
2688 validateTranspose(Trans);
2689 if (!A.getType().getElement().isCompatible(e) ||
2690 !B.getType().getElement().isCompatible(e) ||
2691 !C.getType().getElement().isCompatible(e)) {
2692 throw new RSRuntimeException("Called BLAS with wrong Element type");
2693 }
2694 int Cdim = -1;
2695 // A is n x k if no transpose, k x n if transpose
2696 // C is n x n
2697 if (Trans == TRANSPOSE) {
2698 // check columns versus C
2699 Cdim = A.getType().getX();
2700 } else {
2701 // check rows versus C
2702 Cdim = A.getType().getY();
2703 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002704 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
Tim Murray25207df2015-01-12 16:47:56 -08002705 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
2706 }
2707 // A dims == B dims
2708 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
2709 throw new RSRuntimeException("Invalid A and B in SYR2K");
2710 }
2711 }
Miao Wangfb675a52015-05-12 18:22:20 -07002712
2713 /**
2714 * SSYR2K performs one of the symmetric rank 2k operations
2715 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2716 *
2717 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
2718 *
2719 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2720 * @param Trans The type of transpose applied to the operation.
2721 * @param alpha The scalar alpha.
2722 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2723 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2724 * @param beta The scalar beta.
2725 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2726 */
Tim Murray25207df2015-01-12 16:47:56 -08002727 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
2728 validateUplo(Uplo);
2729 validateSYR2K(Element.F32(mRS), Trans, A, B, C);
2730 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002731 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002732 K = A.getType().getY();
2733 } else {
2734 K = A.getType().getX();
2735 }
2736 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
2737 }
Miao Wangfb675a52015-05-12 18:22:20 -07002738
2739 /**
2740 * DSYR2K performs one of the symmetric rank 2k operations
2741 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2742 *
2743 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
2744 *
2745 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2746 * @param Trans The type of transpose applied to the operation.
2747 * @param alpha The scalar alpha.
2748 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2749 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2750 * @param beta The scalar beta.
2751 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2752 */
Tim Murray25207df2015-01-12 16:47:56 -08002753 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
2754 validateUplo(Uplo);
2755 validateSYR2K(Element.F64(mRS), Trans, A, B, C);
2756 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002757 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002758 K = A.getType().getY();
2759 } else {
2760 K = A.getType().getX();
2761 }
Miao Wang194679ed2015-04-30 17:14:28 -07002762 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002763 }
Miao Wangfb675a52015-05-12 18:22:20 -07002764
2765 /**
2766 * CSYR2K performs one of the symmetric rank 2k operations
2767 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2768 *
2769 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
2770 *
2771 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2772 * @param Trans The type of transpose applied to the operation.
2773 * @param alpha The scalar alpha.
2774 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2775 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2776 * @param beta The scalar beta.
2777 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2778 */
Tim Murray25207df2015-01-12 16:47:56 -08002779 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
2780 validateUplo(Uplo);
2781 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
2782 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002783 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002784 K = A.getType().getY();
2785 } else {
2786 K = A.getType().getX();
2787 }
Miao Wang194679ed2015-04-30 17:14:28 -07002788 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002789 }
Miao Wangfb675a52015-05-12 18:22:20 -07002790
2791 /**
2792 * ZSYR2K performs one of the symmetric rank 2k operations
2793 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2794 *
2795 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
2796 *
2797 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2798 * @param Trans The type of transpose applied to the operation.
2799 * @param alpha The scalar alpha.
2800 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2801 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2802 * @param beta The scalar beta.
2803 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2804 */
Tim Murray25207df2015-01-12 16:47:56 -08002805 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
2806 validateUplo(Uplo);
2807 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
2808 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002809 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002810 K = A.getType().getY();
2811 } else {
2812 K = A.getType().getX();
2813 }
Miao Wang194679ed2015-04-30 17:14:28 -07002814 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002815 }
2816
2817 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
2818 validateSide(Side);
2819 validateTranspose(TransA);
Miao Wang37ae07c2015-04-24 11:19:53 -07002820 int aM = -1, aN = -1, bM = -1, bN = -1;
Tim Murray25207df2015-01-12 16:47:56 -08002821 if (!A.getType().getElement().isCompatible(e) ||
2822 !B.getType().getElement().isCompatible(e)) {
2823 throw new RSRuntimeException("Called BLAS with wrong Element type");
2824 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002825
2826 aM = A.getType().getY();
2827 aN = A.getType().getX();
2828 if (aM != aN) {
2829 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
Tim Murray25207df2015-01-12 16:47:56 -08002830 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002831
2832 bM = B.getType().getY();
2833 bN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002834 if (Side == LEFT) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002835 if (aN != bM) {
Tim Murray25207df2015-01-12 16:47:56 -08002836 throw new RSRuntimeException("Called TRMM with invalid matrices");
2837 }
2838 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07002839 if (bN != aM) {
Tim Murray25207df2015-01-12 16:47:56 -08002840 throw new RSRuntimeException("Called TRMM with invalid matrices");
2841 }
2842 }
2843 }
Miao Wangfb675a52015-05-12 18:22:20 -07002844
2845 /**
2846 * STRMM performs one of the matrix-matrix operations
2847 * B := alpha*op(A)*B or B := alpha*B*op(A)
2848 * op(A) is one of op(A) = A or op(A) = A**T
2849 *
2850 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
2851 *
2852 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2853 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2854 * @param TransA The type of transpose applied to matrix A.
2855 * @param Diag Specifies whether or not A is unit triangular.
2856 * @param alpha The scalar alpha.
2857 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2858 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2859 */
Tim Murray25207df2015-01-12 16:47:56 -08002860 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2861 validateUplo(Uplo);
2862 validateDiag(Diag);
2863 validateTRMM(Element.F32(mRS), Side, TransA, A, B);
2864 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2865 alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);
2866 }
Miao Wangfb675a52015-05-12 18:22:20 -07002867
2868 /**
2869 * DTRMM performs one of the matrix-matrix operations
2870 * B := alpha*op(A)*B or B := alpha*B*op(A)
2871 * op(A) is one of op(A) = A or op(A) = A**T
2872 *
2873 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
2874 *
2875 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2876 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2877 * @param TransA The type of transpose applied to matrix A.
2878 * @param Diag Specifies whether or not A is unit triangular.
2879 * @param alpha The scalar alpha.
2880 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2881 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2882 */
Tim Murray25207df2015-01-12 16:47:56 -08002883 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
2884 validateUplo(Uplo);
2885 validateDiag(Diag);
2886 validateTRMM(Element.F64(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07002887 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2888 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002889 }
Miao Wangfb675a52015-05-12 18:22:20 -07002890
2891 /**
2892 * CTRMM performs one of the matrix-matrix operations
2893 * B := alpha*op(A)*B or B := alpha*B*op(A)
2894 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
2895 *
2896 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
2897 *
2898 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2899 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2900 * @param TransA The type of transpose applied to matrix A.
2901 * @param Diag Specifies whether or not A is unit triangular.
2902 * @param alpha The scalar alpha.
2903 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2904 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2905 */
Tim Murray25207df2015-01-12 16:47:56 -08002906 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
2907 validateUplo(Uplo);
2908 validateDiag(Diag);
2909 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07002910 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08002911 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2912 }
Miao Wangfb675a52015-05-12 18:22:20 -07002913
2914 /**
2915 * ZTRMM performs one of the matrix-matrix operations
2916 * B := alpha*op(A)*B or B := alpha*B*op(A)
2917 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
2918 *
2919 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
2920 *
2921 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2922 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2923 * @param TransA The type of transpose applied to matrix A.
2924 * @param Diag Specifies whether or not A is unit triangular.
2925 * @param alpha The scalar alpha.
2926 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2927 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2928 */
Tim Murray25207df2015-01-12 16:47:56 -08002929 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
2930 validateUplo(Uplo);
2931 validateDiag(Diag);
2932 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07002933 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08002934 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2935 }
2936
2937 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002938 int adim = -1, bM = -1, bN = -1;
Tim Murray25207df2015-01-12 16:47:56 -08002939 validateSide(Side);
2940 validateTranspose(TransA);
2941 if (!A.getType().getElement().isCompatible(e) ||
2942 !B.getType().getElement().isCompatible(e)) {
2943 throw new RSRuntimeException("Called BLAS with wrong Element type");
2944 }
2945 adim = A.getType().getX();
2946 if (adim != A.getType().getY()) {
2947 // this may be unnecessary, the restriction could potentially be relaxed
2948 // A needs to contain at least that symmetric matrix but could theoretically be larger
2949 // for now we assume adapters are sufficient, will reevaluate in the future
2950 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
2951 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002952 bM = B.getType().getY();
2953 bN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002954 if (Side == LEFT) {
2955 // A is M*M
Miao Wang37ae07c2015-04-24 11:19:53 -07002956 if (adim != bM) {
Tim Murray25207df2015-01-12 16:47:56 -08002957 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2958 }
2959 } else {
2960 // A is N*N
Miao Wang37ae07c2015-04-24 11:19:53 -07002961 if (adim != bN) {
Tim Murray25207df2015-01-12 16:47:56 -08002962 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2963 }
2964 }
2965 }
Miao Wangfb675a52015-05-12 18:22:20 -07002966
2967 /**
2968 * STRSM solves one of the matrix equations
2969 * op(A)*X := alpha*B or X*op(A) := alpha*B
2970 * op(A) is one of op(A) = A or op(A) = A**T
2971 *
2972 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
2973 *
2974 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2975 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2976 * @param TransA The type of transpose applied to matrix A.
2977 * @param Diag Specifies whether or not A is unit triangular.
2978 * @param alpha The scalar alpha.
2979 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2980 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2981 */
Tim Murray25207df2015-01-12 16:47:56 -08002982 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2983 validateUplo(Uplo);
2984 validateDiag(Diag);
2985 validateTRSM(Element.F32(mRS), Side, TransA, A, B);
2986 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2987 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
2988 }
Miao Wangfb675a52015-05-12 18:22:20 -07002989
2990 /**
2991 * DTRSM solves one of the matrix equations
2992 * op(A)*X := alpha*B or X*op(A) := alpha*B
2993 * op(A) is one of op(A) = A or op(A) = A**T
2994 *
2995 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
2996 *
2997 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2998 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2999 * @param TransA The type of transpose applied to matrix A.
3000 * @param Diag Specifies whether or not A is unit triangular.
3001 * @param alpha The scalar alpha.
3002 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3003 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3004 */
Tim Murray25207df2015-01-12 16:47:56 -08003005 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3006 validateUplo(Uplo);
3007 validateDiag(Diag);
3008 validateTRSM(Element.F64(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07003009 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08003010 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
3011 }
Miao Wangfb675a52015-05-12 18:22:20 -07003012
3013 /**
3014 * CTRSM solves one of the matrix equations
3015 * op(A)*X := alpha*B or X*op(A) := alpha*B
3016 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
3017 *
3018 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3019 *
3020 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3021 * @param Uplo Specifies whether matrix A is upper or lower triangular.
3022 * @param TransA The type of transpose applied to matrix A.
3023 * @param Diag Specifies whether or not A is unit triangular.
3024 * @param alpha The scalar alpha.
3025 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3026 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3027 */
Tim Murray25207df2015-01-12 16:47:56 -08003028 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3029 validateUplo(Uplo);
3030 validateDiag(Diag);
3031 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07003032 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08003033 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3034 }
Miao Wangfb675a52015-05-12 18:22:20 -07003035
3036 /**
3037 * ZTRSM solves one of the matrix equations
3038 * op(A)*X := alpha*B or X*op(A) := alpha*B
3039 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
3040 *
3041 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3042 *
3043 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3044 * @param Uplo Specifies whether matrix A is upper or lower triangular.
3045 * @param TransA The type of transpose applied to matrix A.
3046 * @param Diag Specifies whether or not A is unit triangular.
3047 * @param alpha The scalar alpha.
3048 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3049 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3050 */
Tim Murray25207df2015-01-12 16:47:56 -08003051 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3052 validateUplo(Uplo);
3053 validateDiag(Diag);
3054 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07003055 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08003056 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3057 }
3058
3059 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3060 validateSide(Side);
3061
3062 if (!A.getType().getElement().isCompatible(e) ||
3063 !B.getType().getElement().isCompatible(e) ||
3064 !C.getType().getElement().isCompatible(e)) {
3065 throw new RSRuntimeException("Called BLAS with wrong Element type");
3066 }
3067
3068 // A must be square; can potentially be relaxed similar to TRSM
3069 int adim = A.getType().getX();
3070 if (adim != A.getType().getY()) {
3071 throw new RSRuntimeException("Called HEMM with non-square A");
3072 }
3073 if ((Side == LEFT && adim != B.getType().getY()) ||
3074 (Side == RIGHT && adim != B.getType().getX())) {
3075 throw new RSRuntimeException("Called HEMM with invalid B");
3076 }
3077 if (B.getType().getX() != C.getType().getX() ||
3078 B.getType().getY() != C.getType().getY()) {
3079 throw new RSRuntimeException("Called HEMM with mismatched B and C");
3080 }
3081 }
Miao Wangfb675a52015-05-12 18:22:20 -07003082
3083 /**
3084 * CHEMM performs one of the matrix-matrix operations
3085 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
3086 *
3087 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3088 *
3089 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3090 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3091 * @param alpha The scalar alpha.
3092 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3093 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3094 * @param beta The scalar beta.
3095 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3096 */
Miao Wang4c472742015-04-22 15:57:57 -07003097 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08003098 validateUplo(Uplo);
3099 validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3100 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
Miao Wang4c472742015-04-22 15:57:57 -07003101 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08003102 }
Miao Wangfb675a52015-05-12 18:22:20 -07003103
3104 /**
3105 * ZHEMM performs one of the matrix-matrix operations
3106 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
3107 *
3108 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3109 *
3110 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3111 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3112 * @param alpha The scalar alpha.
3113 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3114 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3115 * @param beta The scalar beta.
3116 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3117 */
Miao Wang4c472742015-04-22 15:57:57 -07003118 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08003119 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07003120 validateHEMM(Element.F64_2(mRS), Side, A, B, C);
Tim Murray25207df2015-01-12 16:47:56 -08003121 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
Miao Wang4c472742015-04-22 15:57:57 -07003122 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08003123 }
3124
3125 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3126 if (!A.getType().getElement().isCompatible(e) ||
3127 !C.getType().getElement().isCompatible(e)) {
3128 throw new RSRuntimeException("Called BLAS with wrong Element type");
3129 }
3130 validateConjTranspose(Trans);
3131 int cdim = C.getType().getX();
3132 if (cdim != C.getType().getY()) {
3133 throw new RSRuntimeException("Called HERK with non-square C");
3134 }
3135 if (Trans == NO_TRANSPOSE) {
Miao Wang37ae07c2015-04-24 11:19:53 -07003136 if (cdim != A.getType().getY()) {
Tim Murray25207df2015-01-12 16:47:56 -08003137 throw new RSRuntimeException("Called HERK with invalid A");
3138 }
3139 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07003140 if (cdim != A.getType().getX()) {
Tim Murray25207df2015-01-12 16:47:56 -08003141 throw new RSRuntimeException("Called HERK with invalid A");
3142 }
3143 }
3144 }
Miao Wangfb675a52015-05-12 18:22:20 -07003145
3146 /**
3147 * CHERK performs one of the hermitian rank k operations
3148 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C
3149 *
3150 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3151 *
3152 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3153 * @param Trans The type of transpose applied to the operation.
3154 * @param alpha The scalar alpha.
3155 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3156 * @param beta The scalar beta.
3157 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3158 */
Tim Murray25207df2015-01-12 16:47:56 -08003159 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3160 validateUplo(Uplo);
3161 validateHERK(Element.F32_2(mRS), Trans, A, C);
3162 int k = 0;
Miao Wang37ae07c2015-04-24 11:19:53 -07003163 if (Trans == CONJ_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08003164 k = A.getType().getY();
3165 } else {
3166 k = A.getType().getX();
3167 }
3168 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3169 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3170 }
Miao Wangfb675a52015-05-12 18:22:20 -07003171
3172 /**
3173 * ZHERK performs one of the hermitian rank k operations
3174 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C
3175 *
3176 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3177 *
3178 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3179 * @param Trans The type of transpose applied to the operation.
3180 * @param alpha The scalar alpha.
3181 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3182 * @param beta The scalar beta.
3183 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3184 */
Tim Murray25207df2015-01-12 16:47:56 -08003185 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3186 validateUplo(Uplo);
3187 validateHERK(Element.F64_2(mRS), Trans, A, C);
3188 int k = 0;
Miao Wang37ae07c2015-04-24 11:19:53 -07003189 if (Trans == CONJ_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08003190 k = A.getType().getY();
3191 } else {
3192 k = A.getType().getX();
3193 }
3194 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3195 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3196 }
3197
3198 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3199 if (!A.getType().getElement().isCompatible(e) ||
3200 !B.getType().getElement().isCompatible(e) ||
3201 !C.getType().getElement().isCompatible(e)) {
3202 throw new RSRuntimeException("Called BLAS with wrong Element type");
3203 }
3204 validateConjTranspose(Trans);
3205 int cdim = C.getType().getX();
3206 if (cdim != C.getType().getY()) {
3207 throw new RSRuntimeException("Called HER2K with non-square C");
3208 }
3209 if (Trans == NO_TRANSPOSE) {
3210 if (A.getType().getY() != cdim) {
3211 throw new RSRuntimeException("Called HER2K with invalid matrices");
3212 }
3213 } else {
3214 if (A.getType().getX() != cdim) {
3215 throw new RSRuntimeException("Called HER2K with invalid matrices");
3216 }
3217 }
3218 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3219 throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
3220 }
3221 }
Miao Wangfb675a52015-05-12 18:22:20 -07003222
3223 /**
3224 * CHER2K performs one of the hermitian rank 2k operations
3225 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3226 *
3227 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3228 *
3229 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3230 * @param Trans The type of transpose applied to the operation.
3231 * @param alpha The scalar alpha.
3232 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3233 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3234 * @param beta The scalar beta.
3235 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3236 */
Tim Murray25207df2015-01-12 16:47:56 -08003237 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
3238 validateUplo(Uplo);
3239 validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
3240 int k = 0;
3241 if (Trans == NO_TRANSPOSE) {
3242 k = A.getType().getX();
3243 } else {
3244 k = A.getType().getY();
3245 }
3246 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3247 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3248 }
Miao Wangfb675a52015-05-12 18:22:20 -07003249
3250 /**
3251 * ZHER2K performs one of the hermitian rank 2k operations
3252 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3253 *
3254 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3255 *
3256 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3257 * @param Trans The type of transpose applied to the operation.
3258 * @param alpha The scalar alpha.
3259 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3260 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3261 * @param beta The scalar beta.
3262 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3263 */
Tim Murray25207df2015-01-12 16:47:56 -08003264 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
3265 validateUplo(Uplo);
3266 validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
3267 int k = 0;
3268 if (Trans == NO_TRANSPOSE) {
3269 k = A.getType().getX();
3270 } else {
3271 k = A.getType().getY();
3272 }
3273 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3274 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3275 }
3276
3277
Tim Murray9cb16a22015-04-01 11:07:16 -07003278 /**
Miao Wangd7d413a2015-07-15 11:35:28 -07003279 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
Miao Wangfb675a52015-05-12 18:22:20 -07003280 * Calculations are done in 1.10.21 fixed-point format for the final output,
3281 * just before there's a shift down to drop the fractional parts. The output
3282 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3283 * gives some headroom to avoid wrapping around on small overflows.
Miao Wang6099ee62015-06-29 17:43:03 -07003284 *
Miao Wangfb675a52015-05-12 18:22:20 -07003285 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
Miao Wang6099ee62015-06-29 17:43:03 -07003286 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
Miao Wangfb675a52015-05-12 18:22:20 -07003287 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
Miao Wang6099ee62015-06-29 17:43:03 -07003288 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
Miao Wangfb675a52015-05-12 18:22:20 -07003289 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
3290 * @param c_offset The offset for all values in matrix C.
3291 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
Tim Murray9cb16a22015-04-01 11:07:16 -07003292 **/
3293 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
3294 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
3295
Miao Wang6099ee62015-06-29 17:43:03 -07003296 if (a_offset < 0 || a_offset > 255) {
3297 throw new RSRuntimeException("Invalid a_offset passed to BNNM");
3298 }
3299 if (b_offset < 0 || b_offset > 255) {
3300 throw new RSRuntimeException("Invalid b_offset passed to BNNM");
3301 }
Tim Murray9cb16a22015-04-01 11:07:16 -07003302 int M = -1, N = -1, K = -1;
3303 M = A.getType().getY();
3304 N = B.getType().getY();
3305 K = A.getType().getX();
3306
3307
3308 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult);
3309
3310 }
Tim Murray25207df2015-01-12 16:47:56 -08003311
3312}