Blame - rs/java/android/renderscript/ScriptIntrinsicBLAS.java - android_frameworks_base

2015-05-12 18:22:20 -0700

[diff] [blame]

326

327

/**

328

* DGEMV performs one of the matrix-vector operations

329

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y

330

*

331

* Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html

332

*

333

* @param TransA The type of transpose applied to matrix A.

334

* @param alpha The scalar alpha.

335

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

336

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

337

* @param incX The increment for the elements of vector x, must be larger than zero.

338

* @param beta The scalar beta.

339

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

340

* @param incY The increment for the elements of vector y, must be larger than zero.

341

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

342

public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

343

validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);

344

int M = A.getType().getY();

345

int N = A.getType().getX();

346

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

347

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

348

349

/**

350

* CGEMV performs one of the matrix-vector operations

351

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

352

*

353

* Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html

354

*

355

* @param TransA The type of transpose applied to matrix A.

356

* @param alpha The scalar alpha.

357

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

358

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

359

* @param incX The increment for the elements of vector x, must be larger than zero.

360

* @param beta The scalar beta.

361

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

362

* @param incY The increment for the elements of vector y, must be larger than zero.

363

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

364

public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

365

validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);

366

int M = A.getType().getY();

367

int N = A.getType().getX();

368

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

369

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

370

371

/**

372

* ZGEMV performs one of the matrix-vector operations

373

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

374

*

375

* Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html

376

*

377

* @param TransA The type of transpose applied to matrix A.

378

* @param alpha The scalar alpha.

379

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

380

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

381

* @param incX The increment for the elements of vector x, must be larger than zero.

382

* @param beta The scalar beta.

383

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

384

* @param incY The increment for the elements of vector y, must be larger than zero.

385

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

386

public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

387

validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);

388

int M = A.getType().getY();

389

int N = A.getType().getX();

390

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

391

}

392

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

393

/**

394

* SGBMV performs one of the matrix-vector operations

395

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y

396

*

397

* Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html

398

*

399

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

400

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

401

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

402

* for i in range(0, m):

403

* for j in range(max(0, i-kl), min(i+ku+1, n)):

404

* b[i, j-i+kl] = a[i, j]

405

*

406

* @param TransA The type of transpose applied to matrix A.

407

* @param KL The number of sub-diagonals of the matrix A.

408

* @param KU The number of super-diagonals of the matrix A.

409

* @param alpha The scalar alpha.

410

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.

411

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

412

* @param incX The increment for the elements of vector x, must be larger than zero.

413

* @param beta The scalar beta.

414

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

415

* @param incY The increment for the elements of vector y, must be larger than zero.

416

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

417

public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

418

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

419

validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);

420

if (KL < 0 || KU < 0) {

421

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

422

}

423

int M = A.getType().getY();

424

int N = A.getType().getX();

425

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);

426

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

427

428

/**

429

* DGBMV performs one of the matrix-vector operations

430

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y

431

*

432

* Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html

433

*

434

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

435

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

436

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

437

* for i in range(0, m):

438

* for j in range(max(0, i-kl), min(i+ku+1, n)):

439

* b[i, j-i+kl] = a[i, j]

440

*

441

* @param TransA The type of transpose applied to matrix A.

442

* @param KL The number of sub-diagonals of the matrix A.

443

* @param KU The number of super-diagonals of the matrix A.

444

* @param alpha The scalar alpha.

445

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.

446

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

447

* @param incX The increment for the elements of vector x, must be larger than zero.

448

* @param beta The scalar beta.

449

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

450

* @param incY The increment for the elements of vector y, must be larger than zero.

451

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

452

public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

453

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

454

validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);

455

if (KL < 0 || KU < 0) {

456

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

457

}

458

int M = A.getType().getY();

459

int N = A.getType().getX();

460

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);

461

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

462

463

/**

464

* CGBMV performs one of the matrix-vector operations

465

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

466

*

467

* Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html

468

*

469

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

470

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

471

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

472

* for i in range(0, m):

473

* for j in range(max(0, i-kl), min(i+ku+1, n)):

474

* b[i, j-i+kl] = a[i, j]

475

*

476

* @param TransA The type of transpose applied to matrix A.

477

* @param KL The number of sub-diagonals of the matrix A.

478

* @param KU The number of super-diagonals of the matrix A.

479

* @param alpha The scalar alpha.

480

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.

481

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

482

* @param incX The increment for the elements of vector x, must be larger than zero.

483

* @param beta The scalar beta.

484

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

485

* @param incY The increment for the elements of vector y, must be larger than zero.

486

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

487

public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

488

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

489

validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);

490

if (KL < 0 || KU < 0) {

491

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

492

}

493

int M = A.getType().getY();

494

int N = A.getType().getX();

495

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);

496

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

497

498

/**

499

* ZGBMV performs one of the matrix-vector operations

500

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

501

*

502

* Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html

503

*

504

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

505

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

506

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

507

* for i in range(0, m):

508

* for j in range(max(0, i-kl), min(i+ku+1, n)):

509

* b[i, j-i+kl] = a[i, j]

510

*

511

* @param TransA The type of transpose applied to matrix A.

512

* @param KL The number of sub-diagonals of the matrix A.

513

* @param KU The number of super-diagonals of the matrix A.

514

* @param alpha The scalar alpha.

515

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.

516

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

517

* @param incX The increment for the elements of vector x, must be larger than zero.

518

* @param beta The scalar beta.

519

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

520

* @param incY The increment for the elements of vector y, must be larger than zero.

521

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

522

public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

523

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

524

validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);

525

if (KL < 0 || KU < 0) {

526

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

527

}

528

int M = A.getType().getY();

529

int N = A.getType().getX();

530

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);

531

}

532

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

533

static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

534

validateTranspose(TransA);

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

535

validateUplo(Uplo);

536

validateDiag(Diag);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

537

int N = A.getType().getY();

538

if (A.getType().getX() != N) {

539

throw new RSRuntimeException("A must be a square matrix for TRMV");

540

}

541

if (!A.getType().getElement().isCompatible(e) ||

542

!X.getType().getElement().isCompatible(e)) {

543

throw new RSRuntimeException("Called BLAS with wrong Element type");

544

}

545

if (X.getType().getY() > 1) {

546

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

}

if (incX <= 0) {

throw new RSRuntimeException("Vector increments must be greater than 0");

551

}

552

int expectedXDim = 1 + (N - 1) * incX;

553

if (X.getType().getX() != expectedXDim) {

554

throw new RSRuntimeException("Incorrect vector dimensions for TRMV");

}

}

static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

559

validateTranspose(TransA);

560

validateUplo(Uplo);

561

validateDiag(Diag);

562

if (!Ap.getType().getElement().isCompatible(e) ||

563

!X.getType().getElement().isCompatible(e)) {

564

throw new RSRuntimeException("Called BLAS with wrong Element type");

565

}

566

if (X.getType().getY() > 1) {

567

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

568

}

569

570

if (Ap.getType().getY() > 1) {

571

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

572

}

573

574

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

575

//is it really doing anything?

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

576

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

577

throw new RSRuntimeException("Invalid dimension for Ap");

578

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

579

if (incX <= 0) {

580

throw new RSRuntimeException("Vector increments must be greater than 0");

581

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

582

int expectedXDim = 1 + (N - 1) * incX;

583

if (X.getType().getX() != expectedXDim) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

584

throw new RSRuntimeException("Incorrect vector dimensions for TPMV");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

return N;

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

590

/**

591

* STRMV performs one of the matrix-vector operations

592

* x := A*x or x := A**T*x

593

*

594

* Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html

595

*

596

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

597

* @param TransA The type of transpose applied to matrix A.

598

* @param Diag Specifies whether or not A is unit triangular.

599

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

600

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

601

* @param incX The increment for the elements of vector x, must be larger than zero.

602

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

603

public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

604

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

605

int N = A.getType().getY();

606

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

607

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

608

609

/**

610

* DTRMV performs one of the matrix-vector operations

611

* x := A*x or x := A**T*x

612

*

613

* Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html

614

*

615

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

616

* @param TransA The type of transpose applied to matrix A.

617

* @param Diag Specifies whether or not A is unit triangular.

618

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

619

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

620

* @param incX The increment for the elements of vector x, must be larger than zero.

621

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

622

public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

623

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

624

int N = A.getType().getY();

625

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

626

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

627

628

/**

629

* CTRMV performs one of the matrix-vector operations

630

* x := A*x or x := A**T*x or x := A**H*x

631

*

632

* Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html

633

*

634

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

635

* @param TransA The type of transpose applied to matrix A.

636

* @param Diag Specifies whether or not A is unit triangular.

637

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

638

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

639

* @param incX The increment for the elements of vector x, must be larger than zero.

640

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

641

public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

642

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

643

int N = A.getType().getY();

644

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

645

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

646

647

/**

648

* ZTRMV performs one of the matrix-vector operations

649

* x := A*x or x := A**T*x or x := A**H*x

650

*

651

* Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html

652

*

653

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

654

* @param TransA The type of transpose applied to matrix A.

655

* @param Diag Specifies whether or not A is unit triangular.

656

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

657

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

658

* @param incX The increment for the elements of vector x, must be larger than zero.

659

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

660

public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

661

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

662

int N = A.getType().getY();

663

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

664

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

665

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

666

/**

667

* STBMV performs one of the matrix-vector operations

668

* x := A*x or x := A**T*x

669

*

670

* Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html

671

*

672

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

673

* but only the region N*(K+1) will be referenced. The following subroutine can is an

674

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

675

* for i in range(0, n):

676

* for j in range(i, min(i+k+1, n)):

677

* b[i, j-i] = a[i, j]

678

*

679

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

680

* @param TransA The type of transpose applied to matrix A.

681

* @param Diag Specifies whether or not A is unit triangular.

682

* @param K The number of off-diagonals of the matrix A

683

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

684

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

685

* @param incX The increment for the elements of vector x, must be larger than zero.

686

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

687

public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

688

// TBMV has the same requirements as TRMV + K >= 0

689

if (K < 0) {

690

throw new RSRuntimeException("K must be greater than or equal to 0");

691

}

692

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

693

int N = A.getType().getY();

694

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

695

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

696

697

/**

698

* DTBMV performs one of the matrix-vector operations

699

* x := A*x or x := A**T*x

700

*

701

* Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html

702

*

703

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

704

* but only the region N*(K+1) will be referenced. The following subroutine can is an

705

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

706

* for i in range(0, n):

707

* for j in range(i, min(i+k+1, n)):

708

* b[i, j-i] = a[i, j]

709

*

710

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

711

* @param TransA The type of transpose applied to matrix A.

712

* @param Diag Specifies whether or not A is unit triangular.

713

* @param K The number of off-diagonals of the matrix A

714

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

715

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

716

* @param incX The increment for the elements of vector x, must be larger than zero.

717

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

718

public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

719

// TBMV has the same requirements as TRMV + K >= 0

720

if (K < 0) {

721

throw new RSRuntimeException("K must be greater than or equal to 0");

722

}

723

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

724

int N = A.getType().getY();

725

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

726

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

727

728

/**

729

* CTBMV performs one of the matrix-vector operations

730

* x := A*x or x := A**T*x or x := A**H*x

731

*

732

* Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html

733

*

734

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

735

* but only the region N*(K+1) will be referenced. The following subroutine can is an

736

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

737

* for i in range(0, n):

738

* for j in range(i, min(i+k+1, n)):

739

* b[i, j-i] = a[i, j]

740

*

741

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

742

* @param TransA The type of transpose applied to matrix A.

743

* @param Diag Specifies whether or not A is unit triangular.

744

* @param K The number of off-diagonals of the matrix A

745

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

746

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

747

* @param incX The increment for the elements of vector x, must be larger than zero.

748

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

749

public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

750

// TBMV has the same requirements as TRMV + K >= 0

751

if (K < 0) {

752

throw new RSRuntimeException("K must be greater than or equal to 0");

753

}

754

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

755

int N = A.getType().getY();

756

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

757

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

758

759

/**

760

* ZTBMV performs one of the matrix-vector operations

761

* x := A*x or x := A**T*x or x := A**H*x

762

*

763

* Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html

764

*

765

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

766

* but only the region N*(K+1) will be referenced. The following subroutine can is an

767

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

768

* for i in range(0, n):

769

* for j in range(i, min(i+k+1, n)):

770

* b[i, j-i] = a[i, j]

771

*

772

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

773

* @param TransA The type of transpose applied to matrix A.

774

* @param Diag Specifies whether or not A is unit triangular.

775

* @param K The number of off-diagonals of the matrix A

776

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

777

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

778

* @param incX The increment for the elements of vector x, must be larger than zero.

779

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

780

public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

781

// TBMV has the same requirements as TRMV + K >= 0

782

if (K < 0) {

783

throw new RSRuntimeException("K must be greater than or equal to 0");

784

}

785

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

786

int N = A.getType().getY();

787

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

788

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

789

790

/**

791

* STPMV performs one of the matrix-vector operations

792

* x := A*x or x := A**T*x

793

*

794

* Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html

795

*

796

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

797

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

798

* 'a' to packed matrix 'b'.

799

* k = 0

800

* for i in range(0, n):

801

* for j in range(i, n):

802

* b[k++] = a[i, j]

803

*

804

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

805

* @param TransA The type of transpose applied to matrix A.

806

* @param Diag Specifies whether or not A is unit triangular.

807

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.

808

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

809

* @param incX The increment for the elements of vector x, must be larger than zero.

810

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

811

public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

812

int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);

813

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

814

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

815

816

/**

817

* DTPMV performs one of the matrix-vector operations

818

* x := A*x or x := A**T*x

819

*

820

* Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html

821

*

822

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

823

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

824

* 'a' to packed matrix 'b'.

825

* k = 0

826

* for i in range(0, n):

827

* for j in range(i, n):

828

* b[k++] = a[i, j]

829

*

830

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

831

* @param TransA The type of transpose applied to matrix A.

832

* @param Diag Specifies whether or not A is unit triangular.

833

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.

834

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

835

* @param incX The increment for the elements of vector x, must be larger than zero.

836

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

837

public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

838

int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);

839

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

840

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

841

842

/**

843

* CTPMV performs one of the matrix-vector operations

844

* x := A*x or x := A**T*x or x := A**H*x

845

*

846

* Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html

847

*

848

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

849

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

850

* 'a' to packed matrix 'b'.

851

* k = 0

852

* for i in range(0, n):

853

* for j in range(i, n):

854

* b[k++] = a[i, j]

855

*

856

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

857

* @param TransA The type of transpose applied to matrix A.

858

* @param Diag Specifies whether or not A is unit triangular.

859

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.

860

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

861

* @param incX The increment for the elements of vector x, must be larger than zero.

862

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

863

public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

864

int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

865

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

866

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

867

868

/**

869

* ZTPMV performs one of the matrix-vector operations

870

* x := A*x or x := A**T*x or x := A**H*x

871

*

872

* Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html

873

*

874

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

875

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

876

* 'a' to packed matrix 'b'.

877

* k = 0

878

* for i in range(0, n):

879

* for j in range(i, n):

880

* b[k++] = a[i, j]

881

*

882

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

883

* @param TransA The type of transpose applied to matrix A.

884

* @param Diag Specifies whether or not A is unit triangular.

885

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.

886

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

887

* @param incX The increment for the elements of vector x, must be larger than zero.

888

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

889

public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

890

int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

891

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

892

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

893

894

/**

895

* STRSV solves one of the systems of equations

896

* A*x = b or A**T*x = b

897

*

898

* Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html

899

*

900

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

901

* @param TransA The type of transpose applied to matrix A.

902

* @param Diag Specifies whether or not A is unit triangular.

903

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

904

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

905

* @param incX The increment for the elements of vector x, must be larger than zero.

906

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

907

public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

908

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

909

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

910

int N = A.getType().getY();

911

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

912

913

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

914

915

/**

916

* DTRSV solves one of the systems of equations

917

* A*x = b or A**T*x = b

918

*

919

* Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html

920

*

921

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

922

* @param TransA The type of transpose applied to matrix A.

923

* @param Diag Specifies whether or not A is unit triangular.

924

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

925

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

926

* @param incX The increment for the elements of vector x, must be larger than zero.

927

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

928

public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

929

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

930

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

931

int N = A.getType().getY();

932

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

933

934

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

935

936

/**

937

* CTRSV solves one of the systems of equations

938

* A*x = b or A**T*x = b or A**H*x = b

939

*

940

* Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html

941

*

942

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

943

* @param TransA The type of transpose applied to matrix A.

944

* @param Diag Specifies whether or not A is unit triangular.

945

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

946

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

947

* @param incX The increment for the elements of vector x, must be larger than zero.

948

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

949

public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

950

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

951

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

952

int N = A.getType().getY();

953

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

954

955

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

956

957

/**

958

* ZTRSV solves one of the systems of equations

959

* A*x = b or A**T*x = b or A**H*x = b

960

*

961

* Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html

962

*

963

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

964

* @param TransA The type of transpose applied to matrix A.

965

* @param Diag Specifies whether or not A is unit triangular.

966

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

967

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

968

* @param incX The increment for the elements of vector x, must be larger than zero.

969

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

970

public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

971

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

972

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

973

int N = A.getType().getY();

974

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

975

976

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

977

978

/**

979

* STBSV solves one of the systems of equations

980

* A*x = b or A**T*x = b

981

*

982

* Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html

983

*

984

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

985

* but only the region N*(K+1) will be referenced. The following subroutine can is an

986

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

987

* for i in range(0, n):

988

* for j in range(i, min(i+k+1, n)):

989

* b[i, j-i] = a[i, j]

990

*

991

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

992

* @param TransA The type of transpose applied to matrix A.

993

* @param Diag Specifies whether or not A is unit triangular.

994

* @param K The number of off-diagonals of the matrix A

995

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

996

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

997

* @param incX The increment for the elements of vector x, must be larger than zero.

998

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

999

public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1000

// TBSV is the same as TRMV + K >= 0

1001

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1002

int N = A.getType().getY();

1003

if (K < 0) {

1004

throw new RSRuntimeException("Number of diagonals must be positive");

1005

}

1006

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1007

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1008

1009

/**

1010

* DTBSV solves one of the systems of equations

1011

* A*x = b or A**T*x = b

1012

*

1013

* Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html

1014

*

1015

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1016

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1017

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1018

* for i in range(0, n):

1019

* for j in range(i, min(i+k+1, n)):

1020

* b[i, j-i] = a[i, j]

1021

*

1022

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1023

* @param TransA The type of transpose applied to matrix A.

1024

* @param Diag Specifies whether or not A is unit triangular.

1025

* @param K The number of off-diagonals of the matrix A

1026

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1027

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1028

* @param incX The increment for the elements of vector x, must be larger than zero.

1029

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1030

public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1031

// TBSV is the same as TRMV + K >= 0

1032

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1033

int N = A.getType().getY();

1034

if (K < 0) {

1035

throw new RSRuntimeException("Number of diagonals must be positive");

1036

}

1037

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1038

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1039

1040

/**

1041

* CTBSV solves one of the systems of equations

1042

* A*x = b or A**T*x = b or A**H*x = b

1043

*

1044

* Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html

1045

*

1046

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1047

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1048

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1049

* for i in range(0, n):

1050

* for j in range(i, min(i+k+1, n)):

1051

* b[i, j-i] = a[i, j]

1052

*

1053

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1054

* @param TransA The type of transpose applied to matrix A.

1055

* @param Diag Specifies whether or not A is unit triangular.

1056

* @param K The number of off-diagonals of the matrix A

1057

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1058

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1059

* @param incX The increment for the elements of vector x, must be larger than zero.

1060

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1061

public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1062

// TBSV is the same as TRMV + K >= 0

1063

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1064

int N = A.getType().getY();

1065

if (K < 0) {

1066

throw new RSRuntimeException("Number of diagonals must be positive");

1067

}

1068

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

1069

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1070

1071

/**

1072

* ZTBSV solves one of the systems of equations

1073

* A*x = b or A**T*x = b or A**H*x = b

1074

*

1075

* Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html

1076

*

1077

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1078

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1079

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1080

* for i in range(0, n):

1081

* for j in range(i, min(i+k+1, n)):

1082

* b[i, j-i] = a[i, j]

1083

*

1084

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1085

* @param TransA The type of transpose applied to matrix A.

1086

* @param Diag Specifies whether or not A is unit triangular.

1087

* @param K The number of off-diagonals of the matrix A

1088

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

1089

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

1090

* @param incX The increment for the elements of vector x, must be larger than zero.

1091

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1092

public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1093

// TBSV is the same as TRMV + K >= 0

1094

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1095

int N = A.getType().getY();

1096

if (K < 0) {

1097

throw new RSRuntimeException("Number of diagonals must be positive");

1098

}

1099

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

1100

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1101

1102

/**

1103

* STPSV solves one of the systems of equations

1104

* A*x = b or A**T*x = b

1105

*

1106

* Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html

1107

*

1108

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1109

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1110

* 'a' to packed matrix 'b'.

1111

* k = 0

1112

* for i in range(0, n):

1113

* for j in range(i, n):

1114

* b[k++] = a[i, j]

1115

*

1116

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1117

* @param TransA The type of transpose applied to matrix A.

1118

* @param Diag Specifies whether or not A is unit triangular.

1119

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.

1120

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1121

* @param incX The increment for the elements of vector x, must be larger than zero.

1122

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1123

public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1124

// TPSV is same as TPMV

1125

int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);

1126

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1127

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1128

1129

/**

1130

* DTPSV solves one of the systems of equations

1131

* A*x = b or A**T*x = b

1132

*

1133

* Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html

1134

*

1135

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1136

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1137

* 'a' to packed matrix 'b'.

1138

* k = 0

1139

* for i in range(0, n):

1140

* for j in range(i, n):

1141

* b[k++] = a[i, j]

1142

*

1143

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1144

* @param TransA The type of transpose applied to matrix A.

1145

* @param Diag Specifies whether or not A is unit triangular.

1146

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.

1147

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1148

* @param incX The increment for the elements of vector x, must be larger than zero.

1149

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1150

public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1151

// TPSV is same as TPMV

1152

int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);

1153

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1154

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1155

1156

/**

1157

* CTPSV solves one of the systems of equations

1158

* A*x = b or A**T*x = b or A**H*x = b

1159

*

1160

* Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html

1161

*

1162

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1163

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1164

* 'a' to packed matrix 'b'.

1165

* k = 0

1166

* for i in range(0, n):

1167

* for j in range(i, n):

1168

* b[k++] = a[i, j]

1169

*

1170

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1171

* @param TransA The type of transpose applied to matrix A.

1172

* @param Diag Specifies whether or not A is unit triangular.

1173

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.

1174

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1175

* @param incX The increment for the elements of vector x, must be larger than zero.

1176

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1177

public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1178

// TPSV is same as TPMV

1179

int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

1180

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

1181

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1182

1183

/**

1184

* ZTPSV solves one of the systems of equations

1185

* A*x = b or A**T*x = b or A**H*x = b

1186

*

1187

* Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html

1188

*

1189

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1190

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1191

* 'a' to packed matrix 'b'.

1192

* k = 0

1193

* for i in range(0, n):

1194

* for j in range(i, n):

1195

* b[k++] = a[i, j]

1196

*

1197

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1198

* @param TransA The type of transpose applied to matrix A.

1199

* @param Diag Specifies whether or not A is unit triangular.

1200

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.

1201

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

1202

* @param incX The increment for the elements of vector x, must be larger than zero.

1203

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1204

public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1205

// TPSV is same as TPMV

1206

int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

1207

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

}

/**

* Level 2, S and D only

1212

*/

1213

static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {

1214

validateUplo(Uplo);

1215

int N = A.getType().getY();

1216

if (A.getType().getX() != N) {

1217

throw new RSRuntimeException("A must be a square matrix for SYMV");

1218

}

1219

if (!A.getType().getElement().isCompatible(e) ||

1220

!X.getType().getElement().isCompatible(e) ||

1221

!Y.getType().getElement().isCompatible(e) ) {

1222

throw new RSRuntimeException("Called BLAS with wrong Element type");

1223

}

1224

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1225

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1226

}

1227

1228

if (incX <= 0 || incY <= 0) {

1229

throw new RSRuntimeException("Vector increments must be greater than 0");

1230

}

1231

int expectedXDim = 1 + (N - 1) * incX;

1232

if (X.getType().getX() != expectedXDim) {

1233

throw new RSRuntimeException("Incorrect vector dimensions for SYMV");

1234

}

1235

int expectedYDim = 1 + (N - 1) * incY;

1236

if (Y.getType().getX() != expectedYDim) {

1237

throw new RSRuntimeException("Incorrect vector dimensions for SYMV");

}

return N;

}

static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {

1242

validateUplo(Uplo);

1243

if (!Ap.getType().getElement().isCompatible(e) ||

1244

!X.getType().getElement().isCompatible(e) ||

1245

!Y.getType().getElement().isCompatible(e)) {

1246

throw new RSRuntimeException("Called BLAS with wrong Element type");

1247

}

1248

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1249

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1250

}

1251

1252

if (Ap.getType().getY() > 1) {

1253

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

1254

}

1255

1256

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

1257

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

1258

throw new RSRuntimeException("Invalid dimension for Ap");

1259

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1260

if (incX <= 0 || incY <= 0) {

1261

throw new RSRuntimeException("Vector increments must be greater than 0");

1262

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1263

int expectedXDim = 1 + (N - 1) * incX;

1264

if (X.getType().getX() != expectedXDim) {

1265

throw new RSRuntimeException("Incorrect vector dimensions for SPMV");

1266

}

1267

int expectedYDim = 1 + (N - 1) * incY;

1268

if (Y.getType().getX() != expectedYDim) {

1269

throw new RSRuntimeException("Incorrect vector dimensions for SPMV");

}

return N;

}

static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

1275

if (!A.getType().getElement().isCompatible(e) ||

1276

!X.getType().getElement().isCompatible(e) ||

1277

!Y.getType().getElement().isCompatible(e) ) {

1278

throw new RSRuntimeException("Called BLAS with wrong Element type");

1279

}

1280

1281

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1282

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1283

}

1284

1285

int M = A.getType().getY();

1286

int N = A.getType().getX();

1287

1288

if (N < 1 || M < 1) {

1289

throw new RSRuntimeException("M and N must be 1 or greater for GER");

1290

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1291

if (incX <= 0 || incY <= 0) {

1292

throw new RSRuntimeException("Vector increments must be greater than 0");

1293

}

1294

int expectedXDim = 1 + (M - 1) * incX;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1295

if (X.getType().getX() != expectedXDim) {

1296

throw new RSRuntimeException("Incorrect vector dimensions for GER");

1297

}

1298

int expectedYDim = 1 + (N - 1) * incY;

1299

if (Y.getType().getX() != expectedYDim) {

1300

throw new RSRuntimeException("Incorrect vector dimensions for GER");

}

}

static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {

1306

validateUplo(Uplo);

1307

if (!A.getType().getElement().isCompatible(e) ||

1308

!X.getType().getElement().isCompatible(e)) {

1309

throw new RSRuntimeException("Called BLAS with wrong Element type");

1310

}

1311

1312

int N = A.getType().getX();

1313

1314

if (X.getType().getY() > 1) {

1315

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1316

}

1317

if (N != A.getType().getY()) {

1318

throw new RSRuntimeException("A must be a symmetric matrix");

1319

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1320

if (incX <= 0) {

1321

throw new RSRuntimeException("Vector increments must be greater than 0");

1322

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1323

int expectedXDim = 1 + (N - 1) * incX;

1324

if (X.getType().getX() != expectedXDim) {

1325

throw new RSRuntimeException("Incorrect vector dimensions for SYR");

}

return N;

}

static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {

1330

validateUplo(Uplo);

1331

if (!Ap.getType().getElement().isCompatible(e) ||

1332

!X.getType().getElement().isCompatible(e)) {

1333

throw new RSRuntimeException("Called BLAS with wrong Element type");

1334

}

1335

if (X.getType().getY() > 1) {

1336

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1337

}

1338

1339

if (Ap.getType().getY() > 1) {

1340

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

1341

}

1342

1343

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

1344

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

1345

throw new RSRuntimeException("Invalid dimension for Ap");

1346

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1347

if (incX <= 0) {

1348

throw new RSRuntimeException("Vector increments must be greater than 0");

1349

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1350

int expectedXDim = 1 + (N - 1) * incX;

1351

if (X.getType().getX() != expectedXDim) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1352

throw new RSRuntimeException("Incorrect vector dimensions for SPR");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

return N;

}

static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

1359

validateUplo(Uplo);

1360

if (!A.getType().getElement().isCompatible(e) ||

1361

!X.getType().getElement().isCompatible(e) ||

1362

!Y.getType().getElement().isCompatible(e)) {

1363

throw new RSRuntimeException("Called BLAS with wrong Element type");

1364

}

1365

1366

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1367

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1368

}

1369

1370

int N = A.getType().getX();

1371

1372

if (N != A.getType().getY()) {

1373

throw new RSRuntimeException("A must be a symmetric matrix");

1374

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1375

if (incX <= 0 || incY <= 0) {

1376

throw new RSRuntimeException("Vector increments must be greater than 0");

1377

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1378

int expectedXDim = 1 + (N - 1) * incX;

1379

int expectedYDim = 1 + (N - 1) * incY;

1380

if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {

1381

throw new RSRuntimeException("Incorrect vector dimensions for SYR");

}

return N;

}

static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

1387

validateUplo(Uplo);

1388

if (!Ap.getType().getElement().isCompatible(e) ||

1389

!X.getType().getElement().isCompatible(e) ||

1390

!Y.getType().getElement().isCompatible(e)) {

1391

throw new RSRuntimeException("Called BLAS with wrong Element type");

1392

}

1393

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1394

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1395

}

1396

1397

if (Ap.getType().getY() > 1) {

1398

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

1399

}

1400

1401

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

1402

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

1403

throw new RSRuntimeException("Invalid dimension for Ap");

1404

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1405

if (incX <= 0 || incY <= 0) {

1406

throw new RSRuntimeException("Vector increments must be greater than 0");

1407

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1408

int expectedXDim = 1 + (N - 1) * incX;

1409

int expectedYDim = 1 + (N - 1) * incY;

1410

if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1411

throw new RSRuntimeException("Incorrect vector dimensions for SPR2");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

return N;

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1417

/**

1418

* SSYMV performs the matrix-vector operation

1419

* y := alpha*A*x + beta*y

1420

*

1421

* Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html

1422

*

1423

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1424

* @param alpha The scalar alpha.

1425

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1426

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1427

* @param incX The increment for the elements of vector x, must be larger than zero.

1428

* @param beta The scalar beta.

1429

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1430

* @param incY The increment for the elements of vector y, must be larger than zero.

1431

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1432

public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1433

int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);

1434

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1435

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1436

1437

/**

1438

* SSBMV performs the matrix-vector operation

1439

* y := alpha*A*x + beta*y

1440

*

1441

* Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html

1442

*

1443

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1444

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1445

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1446

* for i in range(0, n):

1447

* for j in range(i, min(i+k+1, n)):

1448

* b[i, j-i] = a[i, j]

1449

*

1450

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

1451

* @param K The number of off-diagonals of the matrix A

1452

* @param alpha The scalar alpha.

1453

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1454

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1455

* @param incX The increment for the elements of vector x, must be larger than zero.

1456

* @param beta The scalar beta.

1457

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1458

* @param incY The increment for the elements of vector y, must be larger than zero.

1459

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1460

public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1461

// SBMV is the same as SYMV + K >= 0

1462

if (K < 0) {

1463

throw new RSRuntimeException("K must be greater than or equal to 0");

1464

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1465

int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);

1466

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1467

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1468

1469

/**

1470

* SSPMV performs the matrix-vector operation

1471

* y := alpha*A*x + beta*y

1472

*

1473

* Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html

1474

*

1475

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1476

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1477

* 'a' to packed matrix 'b'.

1478

* k = 0

1479

* for i in range(0, n):

1480

* for j in range(i, n):

1481

* b[k++] = a[i, j]

1482

*

1483

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

1484

* @param alpha The scalar alpha.

1485

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.

1486

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1487

* @param incX The increment for the elements of vector x, must be larger than zero.

1488

* @param beta The scalar beta.

1489

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1490

* @param incY The increment for the elements of vector y, must be larger than zero.

1491

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1492

public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1493

int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);

1494

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1495

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1496

1497

/**

1498

* SGER performs the rank 1 operation

1499

* A := alpha*x*y**T + A

1500

*

1501

* Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html

1502

*

1503

* @param alpha The scalar alpha.

1504

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1505

* @param incX The increment for the elements of vector x, must be larger than zero.

1506

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1507

* @param incY The increment for the elements of vector y, must be larger than zero.

1508

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1509

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1510

public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1511

int M = A.getType().getY();

1512

int N = A.getType().getX();

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1513

validateGER(Element.F32(mRS), X, incX, Y, incY, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1514

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);

1515

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1516

1517

/**

1518

* SSYR performs the rank 1 operation

1519

* A := alpha*x*x**T + A

1520

*

1521

* Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html

1522

*

1523

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1524

* @param alpha The scalar alpha.

1525

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1526

* @param incX The increment for the elements of vector x, must be larger than zero.

1527

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1528

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1529

public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1530

int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);

1531

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1532

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1533

1534

/**

1535

* SSPR performs the rank 1 operation

1536

* A := alpha*x*x**T + A

1537

*

1538

* Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html

1539

*

1540

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1541

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1542

* 'a' to packed matrix 'b'.

1543

* k = 0

1544

* for i in range(0, n):

1545

* for j in range(i, n):

1546

* b[k++] = a[i, j]

1547

*

1548

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1549

* @param alpha The scalar alpha.

1550

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1551

* @param incX The increment for the elements of vector x, must be larger than zero.

1552

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.

1553

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1554

public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1555

int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);

1556

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1557

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1558

1559

/**

1560

* SSYR2 performs the symmetric rank 2 operation

1561

* A := alpha*x*y**T + alpha*y*x**T + A

1562

*

1563

* Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html

1564

*

1565

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1566

* @param alpha The scalar alpha.

1567

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1568

* @param incX The increment for the elements of vector x, must be larger than zero.

1569

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1570

* @param incY The increment for the elements of vector y, must be larger than zero.

1571

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1572

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1573

public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1574

int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);

1575

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);

1576

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1577

1578

/**

1579

* SSPR2 performs the symmetric rank 2 operation

1580

* A := alpha*x*y**T + alpha*y*x**T + A

1581

*

1582

* Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html

1583

*

1584

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1585

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1586

* 'a' to packed matrix 'b'.

1587

* k = 0

1588

* for i in range(0, n):

1589

* for j in range(i, n):

1590

* b[k++] = a[i, j]

1591

*

1592

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1593

* @param alpha The scalar alpha.

1594

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1595

* @param incX The increment for the elements of vector x, must be larger than zero.

1596

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1597

* @param incY The increment for the elements of vector y, must be larger than zero.

1598

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.

1599

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1600

public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1601

int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);

1602

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);

1603

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1604

1605

/**

1606

* DSYMV performs the matrix-vector operation

1607

* y := alpha*A*x + beta*y

1608

*

1609

* Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html

1610

*

1611

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1612

* @param alpha The scalar alpha.

1613

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1614

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1615

* @param incX The increment for the elements of vector x, must be larger than zero.

1616

* @param beta The scalar beta.

1617

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1618

* @param incY The increment for the elements of vector y, must be larger than zero.

1619

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1620

public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1621

int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);

1622

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1623

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1624

1625

/**

1626

* DSBMV performs the matrix-vector operation

1627

* y := alpha*A*x + beta*y

1628

*

1629

* Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html

1630

*

1631

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1632

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1633

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1634

* for i in range(0, n):

1635

* for j in range(i, min(i+k+1, n)):

1636

* b[i, j-i] = a[i, j]

1637

*

1638

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

1639

* @param K The number of off-diagonals of the matrix A

1640

* @param alpha The scalar alpha.

1641

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1642

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1643

* @param incX The increment for the elements of vector x, must be larger than zero.

1644

* @param beta The scalar beta.

1645

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1646

* @param incY The increment for the elements of vector y, must be larger than zero.

1647

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1648

public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1649

// SBMV is the same as SYMV + K >= 0

1650

if (K < 0) {

1651

throw new RSRuntimeException("K must be greater than or equal to 0");

1652

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1653

int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);

1654

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1655

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1656

1657

/**

1658

* DSPMV performs the matrix-vector operation

1659

* y := alpha*A*x + beta*y

1660

*

1661

* Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html

1662

*

1663

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1664

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1665

* 'a' to packed matrix 'b'.

1666

* k = 0

1667

* for i in range(0, n):

1668

* for j in range(i, n):

1669

* b[k++] = a[i, j]

1670

*

1671

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

1672

* @param alpha The scalar alpha.

1673

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.

1674

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1675

* @param incX The increment for the elements of vector x, must be larger than zero.

1676

* @param beta The scalar beta.

1677

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1678

* @param incY The increment for the elements of vector y, must be larger than zero.

1679

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1680

public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1681

int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);

1682

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1683

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1684

1685

/**

1686

* DGER performs the rank 1 operation

1687

* A := alpha*x*y**T + A

1688

*

1689

* Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html

1690

*

1691

* @param alpha The scalar alpha.

1692

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1693

* @param incX The increment for the elements of vector x, must be larger than zero.

1694

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1695

* @param incY The increment for the elements of vector y, must be larger than zero.

1696

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1697

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1698

public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1699

int M = A.getType().getY();

1700

int N = A.getType().getX();

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1701

validateGER(Element.F64(mRS), X, incX, Y, incY, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1702

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);

1703

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1704

1705

/**

1706

* DSYR performs the rank 1 operation

1707

* A := alpha*x*x**T + A

1708

*

1709

* Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html

1710

*

1711

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1712

* @param alpha The scalar alpha.

1713

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1714

* @param incX The increment for the elements of vector x, must be larger than zero.

1715

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1716

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1717

public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1718

int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);

1719

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1720

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1721

1722

/**

1723

* DSPR performs the rank 1 operation

1724

* A := alpha*x*x**T + A

1725

*

1726

* Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html

1727

*

1728

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1729

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1730

* 'a' to packed matrix 'b'.

1731

* k = 0

1732

* for i in range(0, n):

1733

* for j in range(i, n):

1734

* b[k++] = a[i, j]

1735

*

1736

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1737

* @param alpha The scalar alpha.

1738

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1739

* @param incX The increment for the elements of vector x, must be larger than zero.

1740

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.

1741

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1742

public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1743

int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);

1744

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1745

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1746

1747

/**

1748

* DSYR2 performs the symmetric rank 2 operation

1749

* A := alpha*x*y**T + alpha*y*x**T + A

1750

*

1751

* Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html

1752

*

1753

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1754

* @param alpha The scalar alpha.

1755

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1756

* @param incX The increment for the elements of vector x, must be larger than zero.

1757

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1758

* @param incY The increment for the elements of vector y, must be larger than zero.

1759

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1760

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1761

public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1762

int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);

1763

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);

1764

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1765

1766

/**

1767

* DSPR2 performs the symmetric rank 2 operation

1768

* A := alpha*x*y**T + alpha*y*x**T + A

1769

*

1770

* Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html

1771

*

1772

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1773

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1774

* 'a' to packed matrix 'b'.

1775

* k = 0

1776

* for i in range(0, n):

1777

* for j in range(i, n):

1778

* b[k++] = a[i, j]

1779

*

1780

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1781

* @param alpha The scalar alpha.

1782

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1783

* @param incX The increment for the elements of vector x, must be larger than zero.

1784

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1785

* @param incY The increment for the elements of vector y, must be larger than zero.

1786

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.

1787

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1788

public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1789

int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);

1790

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);

}

/**

* Level 2, C and Z only

1796

*/

1797

1798

static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

1799

if (!A.getType().getElement().isCompatible(e) ||

1800

!X.getType().getElement().isCompatible(e) ||

1801

!Y.getType().getElement().isCompatible(e)) {

1802

throw new RSRuntimeException("Called BLAS with wrong Element type");

1803

}

1804

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1805

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1806

}

1807

1808

int M = A.getType().getY();

1809

int N = A.getType().getX();

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1810

if (incX <= 0 || incY <= 0) {

1811

throw new RSRuntimeException("Vector increments must be greater than 0");

1812

}

1813

int expectedXDim = 1 + (M - 1) * incX;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1814

if (X.getType().getX() != expectedXDim) {

1815

throw new RSRuntimeException("Incorrect vector dimensions for GERU");

1816

}

1817

int expectedYDim = 1 + (N - 1) * incY;

1818

if (Y.getType().getX() != expectedYDim) {

1819

throw new RSRuntimeException("Incorrect vector dimensions for GERU");

}

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1824

/**

1825

* CHEMV performs the matrix-vector operation

1826

* y := alpha*A*x + beta*y

1827

*

1828

* Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html

1829

*

1830

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1831

* @param alpha The scalar alpha.

1832

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1833

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1834

* @param incX The increment for the elements of vector x, must be larger than zero.

1835

* @param beta The scalar beta.

1836

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1837

* @param incY The increment for the elements of vector y, must be larger than zero.

1838

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1839

public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1840

// HEMV is the same as SYR2 validation-wise

1841

int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);

1842

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

1843

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1844

1845

/**

1846

* CHBMV performs the matrix-vector operation

1847

* y := alpha*A*x + beta*y

1848

*

1849

* Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html

1850

*

1851

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1852

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1853

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1854

* for i in range(0, n):

1855

* for j in range(i, min(i+k+1, n)):

1856

* b[i, j-i] = a[i, j]

1857

*

1858

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

1859

* @param K The number of off-diagonals of the matrix A

1860

* @param alpha The scalar alpha.

1861

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1862

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1863

* @param incX The increment for the elements of vector x, must be larger than zero.

1864

* @param beta The scalar beta.

1865

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1866

* @param incY The increment for the elements of vector y, must be larger than zero.

1867

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1868

public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1869

// HBMV is the same as SYR2 validation-wise

1870

int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);

1871

if (K < 0) {

1872

throw new RSRuntimeException("K must be 0 or greater for HBMV");

1873

}

1874

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

1875

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1876

1877

/**

1878

* CHPMV performs the matrix-vector operation

1879

* y := alpha*A*x + beta*y

1880

*

1881

* Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html

1882

*

1883

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1884

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1885

* 'a' to packed matrix 'b'.

1886

* k = 0

1887

* for i in range(0, n):

1888

* for j in range(i, n):

1889

* b[k++] = a[i, j]

1890

*

1891

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

1892

* @param alpha The scalar alpha.

1893

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1894

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1895

* @param incX The increment for the elements of vector x, must be larger than zero.

1896

* @param beta The scalar beta.

1897

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1898

* @param incY The increment for the elements of vector y, must be larger than zero.

1899

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1900

public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1901

// HPMV is the same as SPR2

1902

int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);

1903

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

1904

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1905

1906

/**

1907

* CGERU performs the rank 1 operation

1908

* A := alpha*x*y**T + A

1909

*

1910

* Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html

1911

*

1912

* @param alpha The scalar alpha.

1913

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1914

* @param incX The increment for the elements of vector x, must be larger than zero.

1915

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1916

* @param incY The increment for the elements of vector y, must be larger than zero.

1917

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1918

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1919

public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1920

validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);

1921

int M = A.getType().getY();

1922

int N = A.getType().getX();

1923

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

1924

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1925

1926

/**

1927

* CGERC performs the rank 1 operation

1928

* A := alpha*x*y**H + A

1929

*

1930

* Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html

1931

*

1932

* @param alpha The scalar alpha.

1933

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1934

* @param incX The increment for the elements of vector x, must be larger than zero.

1935

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1936

* @param incY The increment for the elements of vector y, must be larger than zero.

1937

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1938

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1939

public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1940

// same as GERU

1941

validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);

1942

int M = A.getType().getY();

1943

int N = A.getType().getX();

1944

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

1945

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1946

1947

/**

1948

* CHER performs the rank 1 operation

1949

* A := alpha*x*x**H + A

1950

*

1951

* Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html

1952

*

1953

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1954

* @param alpha The scalar alpha.

1955

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1956

* @param incX The increment for the elements of vector x, must be larger than zero.

1957

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1958

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1959

public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1960

// same as SYR

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1961

int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1962

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);

1963

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1964

1965

/**

1966

* CHPR performs the rank 1 operation

1967

* A := alpha*x*x**H + A

1968

*

1969

* Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html

1970

*

1971

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1972

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1973

* 'a' to packed matrix 'b'.

1974

* k = 0

1975

* for i in range(0, n):

1976

* for j in range(i, n):

1977

* b[k++] = a[i, j]

1978

*

1979

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1980

* @param alpha The scalar alpha.

1981

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1982

* @param incX The increment for the elements of vector x, must be larger than zero.

1983

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1984

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1985

public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1986

// equivalent to SPR for validation

1987

int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);

1988

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);

1989

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1990

1991

/**

1992

* CHER2 performs the symmetric rank 2 operation

1993

* A := alpha*x*y**H + alpha*y*x**H + A

1994

*

1995

* Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html

1996

*

1997

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1998

* @param alpha The scalar alpha.

1999

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

2000

* @param incX The increment for the elements of vector x, must be larger than zero.

2001

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

2002

* @param incY The increment for the elements of vector y, must be larger than zero.

2003

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2004

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2005

public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2006

// same as SYR2

2007

int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);

2008

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2009

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2010

2011

/**

2012

* CHPR2 performs the symmetric rank 2 operation

2013

* A := alpha*x*y**H + alpha*y*x**H + A

2014

*

2015

* Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html

2016

*

2017

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2018

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2019

* 'a' to packed matrix 'b'.

2020

* k = 0

2021

* for i in range(0, n):

2022

* for j in range(i, n):

2023

* b[k++] = a[i, j]

2024

*

2025

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

2026

* @param alpha The scalar alpha.

2027

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

2028

* @param incX The increment for the elements of vector x, must be larger than zero.

2029

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

2030

* @param incY The increment for the elements of vector y, must be larger than zero.

2031

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2032

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2033

public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2034

// same as SPR2

2035

int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);

2036

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);

2037

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2038

2039

/**

2040

* ZHEMV performs the matrix-vector operation

2041

* y := alpha*A*x + beta*y

2042

*

2043

* Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html

2044

*

2045

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2046

* @param alpha The scalar alpha.

2047

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2048

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2049

* @param incX The increment for the elements of vector x, must be larger than zero.

2050

* @param beta The scalar beta.

2051

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2052

* @param incY The increment for the elements of vector y, must be larger than zero.

2053

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2054

public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2055

// HEMV is the same as SYR2 validation-wise

2056

int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);

2057

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

2058

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2059

2060

/**

2061

* ZHBMV performs the matrix-vector operation

2062

* y := alpha*A*x + beta*y

2063

*

2064

* Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html

2065

*

2066

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

2067

* but only the region N*(K+1) will be referenced. The following subroutine can is an

2068

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

2069

* for i in range(0, n):

2070

* for j in range(i, min(i+k+1, n)):

2071

* b[i, j-i] = a[i, j]

2072

*

2073

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

2074

* @param K The number of off-diagonals of the matrix A

2075

* @param alpha The scalar alpha.

2076

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2077

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2078

* @param incX The increment for the elements of vector x, must be larger than zero.

2079

* @param beta The scalar beta.

2080

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2081

* @param incY The increment for the elements of vector y, must be larger than zero.

2082

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2083

public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2084

// HBMV is the same as SYR2 validation-wise

2085

int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);

2086

if (K < 0) {

2087

throw new RSRuntimeException("K must be 0 or greater for HBMV");

2088

}

2089

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

2090

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2091

2092

/**

2093

* ZHPMV performs the matrix-vector operation

2094

* y := alpha*A*x + beta*y

2095

*

2096

* Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html

2097

*

2098

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2099

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2100

* 'a' to packed matrix 'b'.

2101

* k = 0

2102

* for i in range(0, n):

2103

* for j in range(i, n):

2104

* b[k++] = a[i, j]

2105

*

2106

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

2107

* @param alpha The scalar alpha.

2108

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2109

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2110

* @param incX The increment for the elements of vector x, must be larger than zero.

2111

* @param beta The scalar beta.

2112

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2113

* @param incY The increment for the elements of vector y, must be larger than zero.

2114

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2115

public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2116

// HPMV is the same as SPR2

2117

int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);

2118

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

2119

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2120

2121

/**

2122

* ZGERU performs the rank 1 operation

2123

* A := alpha*x*y**T + A

2124

*

2125

* Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html

2126

*

2127

* @param alpha The scalar alpha.

2128

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2129

* @param incX The increment for the elements of vector x, must be larger than zero.

2130

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2131

* @param incY The increment for the elements of vector y, must be larger than zero.

2132

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2133

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2134

public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2135

validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);

2136

int M = A.getType().getY();

2137

int N = A.getType().getX();

2138

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2139

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2140

2141

/**

2142

* ZGERC performs the rank 1 operation

2143

* A := alpha*x*y**H + A

2144

*

2145

* Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html

2146

*

2147

* @param alpha The scalar alpha.

2148

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2149

* @param incX The increment for the elements of vector x, must be larger than zero.

2150

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2151

* @param incY The increment for the elements of vector y, must be larger than zero.

2152

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2153

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2154

public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2155

// same as GERU

2156

validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);

2157

int M = A.getType().getY();

2158

int N = A.getType().getX();

2159

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2160

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2161

2162

/**

2163

* ZHER performs the rank 1 operation

2164

* A := alpha*x*x**H + A

2165

*

2166

* Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html

2167

*

2168

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2169

* @param alpha The scalar alpha.

2170

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2171

* @param incX The increment for the elements of vector x, must be larger than zero.

2172

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2173

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2174

public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2175

// same as SYR

Miao Wang

cecc00a

2015-04-29 18:14:55 -0700

[diff] [blame]

2176

int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2177

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);

2178

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2179

2180

/**

2181

* ZHPR performs the rank 1 operation

2182

* A := alpha*x*x**H + A

2183

*

2184

* Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html

2185

*

2186

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2187

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2188

* 'a' to packed matrix 'b'.

2189

* k = 0

2190

* for i in range(0, n):

2191

* for j in range(i, n):

2192

* b[k++] = a[i, j]

2193

*

2194

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

2195

* @param alpha The scalar alpha.

2196

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2197

* @param incX The increment for the elements of vector x, must be larger than zero.

2198

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2199

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2200

public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2201

// equivalent to SPR for validation

2202

int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);

2203

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);

2204

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2205

2206

/**

2207

* ZHER2 performs the symmetric rank 2 operation

2208

* A := alpha*x*y**H + alpha*y*x**H + A

2209

*

2210

* Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html

2211

*

2212

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2213

* @param alpha The scalar alpha.

2214

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2215

* @param incX The increment for the elements of vector x, must be larger than zero.

2216

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2217

* @param incY The increment for the elements of vector y, must be larger than zero.

2218

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2219

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2220

public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2221

// same as SYR2

2222

int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);

2223

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2224

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2225

2226

/**

2227

* ZHPR2 performs the symmetric rank 2 operation

2228

* A := alpha*x*y**H + alpha*y*x**H + A

2229

*

2230

* Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html

2231

*

2232

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2233

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2234

* 'a' to packed matrix 'b'.

2235

* k = 0

2236

* for i in range(0, n):

2237

* for j in range(i, n):

2238

* b[k++] = a[i, j]

2239

*

2240

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

2241

* @param alpha The scalar alpha.

2242

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2243

* @param incX The increment for the elements of vector x, must be larger than zero.

2244

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2245

* @param incY The increment for the elements of vector y, must be larger than zero.

2246

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2247

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2248

public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2249

// same as SPR2

2250

int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);

2251

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);

}

/**

* Level 3 BLAS

*/

static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2260

int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2261

if ((A != null && !A.getType().getElement().isCompatible(e)) ||

2262

(B != null && !B.getType().getElement().isCompatible(e)) ||

2263

(C != null && !C.getType().getElement().isCompatible(e))) {

2264

throw new RSRuntimeException("Called BLAS with wrong Element type");

2265

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2266

if (C == null) {

2267

//since matrix C is used to store the result, it cannot be null.

2268

throw new RSRuntimeException("Allocation C cannot be null");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2269

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2270

cM = C.getType().getY();

2271

cN = C.getType().getX();

2272

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2273

if (Side == RIGHT) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2274

if ((A == null && B != null) || (A != null && B == null)) {

2275

throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");

2276

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2277

if (B != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2278

bM = A.getType().getY();

2279

bN = A.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2280

}

2281

if (A != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2282

aM = B.getType().getY();

2283

aN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2284

}

2285

} else {

2286

if (A != null) {

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2287

if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2288

aN = A.getType().getY();

2289

aM = A.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2290

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2291

aM = A.getType().getY();

2292

aN = A.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2293

}

2294

}

2295

if (B != null) {

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2296

if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2297

bN = B.getType().getY();

2298

bM = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2299

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2300

bM = B.getType().getY();

2301

bN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

}

}

if (A != null && B != null && C != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2306

if (aN != bM || aM != cM || bN != cN) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2307

throw new RSRuntimeException("Called BLAS with invalid dimensions");

2308

}

2309

} else if (A != null && C != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2310

// A and C only, for SYRK

2311

if (cM != cN) {

2312

throw new RSRuntimeException("Matrix C is not symmetric");

2313

}

Miao Wang

50a8ff1

2015-05-01 15:32:24 -0700

[diff] [blame]

2314

if (aM != cM) {

2315

throw new RSRuntimeException("Called BLAS with invalid dimensions");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2316

}

2317

} else if (A != null && B != null) {

2318

// A and B only

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2319

if (aN != bM) {

2320

throw new RSRuntimeException("Called BLAS with invalid dimensions");

2321

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2326

/**

2327

* SGEMM performs one of the matrix-matrix operations

2328

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T

2329

*

2330

* Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html

2331

*

2332

* @param TransA The type of transpose applied to matrix A.

2333

* @param TransB The type of transpose applied to matrix B.

2334

* @param alpha The scalar alpha.

2335

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2336

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2337

* @param beta The scalar beta.

2338

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2339

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2340

public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,

2341

Allocation B, float beta, Allocation C) {

2342

validateTranspose(TransA);

2343

validateTranspose(TransB);

2344

validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);

2345

2346

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2347

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2348

M = A.getType().getX();

2349

K = A.getType().getY();

2350

} else {

2351

M = A.getType().getY();

2352

K = A.getType().getX();

2353

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2354

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2355

N = B.getType().getY();

2356

} else {

2357

N = B.getType().getX();

2358

}

2359

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),

2360

beta, C.getID(mRS), 0, 0, 0, 0);

2361

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2362

2363

/**

2364

* DGEMM performs one of the matrix-matrix operations

2365

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T

2366

*

2367

* Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html

2368

*

2369

* @param TransA The type of transpose applied to matrix A.

2370

* @param TransB The type of transpose applied to matrix B.

2371

* @param alpha The scalar alpha.

2372

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2373

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2374

* @param beta The scalar beta.

2375

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2376

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2377

public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,

2378

Allocation B, double beta, Allocation C) {

2379

validateTranspose(TransA);

2380

validateTranspose(TransB);

2381

validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);

2382

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2383

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2384

M = A.getType().getX();

2385

K = A.getType().getY();

2386

} else {

2387

M = A.getType().getY();

2388

K = A.getType().getX();

2389

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2390

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2391

N = B.getType().getY();

2392

} else {

2393

N = B.getType().getX();

2394

}

2395

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),

2396

beta, C.getID(mRS), 0, 0, 0, 0);

2397

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2398

2399

/**

2400

* CGEMM performs one of the matrix-matrix operations

2401

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H

2402

*

2403

* Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html

2404

*

2405

* @param TransA The type of transpose applied to matrix A.

2406

* @param TransB The type of transpose applied to matrix B.

2407

* @param alpha The scalar alpha.

2408

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2409

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2410

* @param beta The scalar beta.

2411

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2412

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2413

public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,

2414

Allocation B, Float2 beta, Allocation C) {

2415

validateTranspose(TransA);

2416

validateTranspose(TransB);

2417

validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);

2418

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2419

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2420

M = A.getType().getX();

2421

K = A.getType().getY();

2422

} else {

2423

M = A.getType().getY();

2424

K = A.getType().getX();

2425

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2426

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2427

N = B.getType().getY();

2428

} else {

2429

N = B.getType().getX();

2430

}

2431

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2432

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2433

}

2434

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2435

/**

2436

* ZGEMM performs one of the matrix-matrix operations

2437

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H

2438

*

2439

* Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html

2440

*

2441

* @param TransA The type of transpose applied to matrix A.

2442

* @param TransB The type of transpose applied to matrix B.

2443

* @param alpha The scalar alpha.

Elliot Waite

54de7747

2017-01-11 15:30:35 -0800

[diff] [blame]

2444

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2445

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2446

* @param beta The scalar beta.

Elliot Waite

54de7747

2017-01-11 15:30:35 -0800

[diff] [blame]

2447

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2448

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2449

public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,

2450

Allocation B, Double2 beta, Allocation C) {

2451

validateTranspose(TransA);

2452

validateTranspose(TransB);

2453

validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);

2454

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2455

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2456

M = A.getType().getX();

2457

K = A.getType().getY();

2458

} else {

2459

M = A.getType().getY();

2460

K = A.getType().getX();

2461

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2462

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2463

N = B.getType().getY();

2464

} else {

2465

N = B.getType().getX();

2466

}

2467

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2468

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2469

}

2470

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2471

/**

2472

* SSYMM performs one of the matrix-matrix operations

2473

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2474

*

2475

* Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html

2476

*

2477

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2478

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2479

* @param alpha The scalar alpha.

2480

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2481

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2482

* @param beta The scalar beta.

2483

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2484

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2485

public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,

2486

Allocation B, float beta, Allocation C) {

2487

validateSide(Side);

2488

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2489

//For SYMM, Matrix A should be symmetric

2490

if (A.getType().getX() != A.getType().getY()) {

2491

throw new RSRuntimeException("Matrix A is not symmetric");

2492

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2493

validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);

2494

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),

2495

beta, C.getID(mRS), 0, 0, 0, 0);

2496

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2497

2498

/**

2499

* DSYMM performs one of the matrix-matrix operations

2500

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2501

*

2502

* Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html

2503

*

2504

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2505

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2506

* @param alpha The scalar alpha.

2507

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2508

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2509

* @param beta The scalar beta.

2510

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2511

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2512

public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,

2513

Allocation B, double beta, Allocation C) {

2514

validateSide(Side);

2515

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2516

if (A.getType().getX() != A.getType().getY()) {

2517

throw new RSRuntimeException("Matrix A is not symmetric");

2518

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2519

validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);

2520

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),

2521

beta, C.getID(mRS), 0, 0, 0, 0);

2522

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2523

2524

/**

2525

* CSYMM performs one of the matrix-matrix operations

2526

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2527

*

2528

* Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html

2529

*

2530

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2531

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2532

* @param alpha The scalar alpha.

2533

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2534

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2535

* @param beta The scalar beta.

2536

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2537

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2538

public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,

2539

Allocation B, Float2 beta, Allocation C) {

2540

validateSide(Side);

2541

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2542

if (A.getType().getX() != A.getType().getY()) {

2543

throw new RSRuntimeException("Matrix A is not symmetric");

2544

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2545

validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);

2546

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2547

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2548

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2549

2550

/**

2551

* ZSYMM performs one of the matrix-matrix operations

2552

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2553

*

2554

* Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html

2555

*

2556

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2557

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2558

* @param alpha The scalar alpha.

2559

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2560

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

2561

* @param beta The scalar beta.

2562

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

2563

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2564

public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,

2565

Allocation B, Double2 beta, Allocation C) {

2566

validateSide(Side);

2567

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2568

if (A.getType().getX() != A.getType().getY()) {

2569

throw new RSRuntimeException("Matrix A is not symmetric");

2570

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2571

validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);

2572

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2573

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2574

}

2575

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2576

/**

2577

* SSYRK performs one of the symmetric rank k operations

2578

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2579

*

2580

* Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html

2581

*

2582

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2583

* @param Trans The type of transpose applied to the operation.

2584

* @param alpha The scalar alpha.

2585

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2586

* @param beta The scalar beta.

2587

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2588

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2589

public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {

2590

validateTranspose(Trans);

2591

validateUplo(Uplo);

2592

validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);

2593

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2594

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2595

K = A.getType().getY();

2596

} else {

2597

K = A.getType().getX();

2598

}

2599

2600

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);

2601

}

2602

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2603

/**

2604

* DSYRK performs one of the symmetric rank k operations

2605

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2606

*

2607

* Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html

2608

*

2609

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2610

* @param Trans The type of transpose applied to the operation.

2611

* @param alpha The scalar alpha.

2612

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2613

* @param beta The scalar beta.

2614

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2615

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2616

public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {

2617

validateTranspose(Trans);

2618

validateUplo(Uplo);

2619

validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);

2620

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2621

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2622

K = A.getType().getY();

2623

} else {

2624

K = A.getType().getX();

2625

}

2626

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);

2627

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2628

2629

/**

2630

* CSYRK performs one of the symmetric rank k operations

2631

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2632

*

2633

* Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html

2634

*

2635

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2636

* @param Trans The type of transpose applied to the operation.

2637

* @param alpha The scalar alpha.

2638

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2639

* @param beta The scalar beta.

2640

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2641

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2642

public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2643

validateTranspose(Trans);

2644

validateUplo(Uplo);

2645

validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);

2646

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2647

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2648

K = A.getType().getY();

2649

} else {

2650

K = A.getType().getX();

2651

}

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2652

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2653

C.getID(mRS), 0, 0, 0, 0);

2654

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2655

2656

/**

2657

* ZSYRK performs one of the symmetric rank k operations

2658

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2659

*

2660

* Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html

2661

*

2662

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2663

* @param Trans The type of transpose applied to the operation.

2664

* @param alpha The scalar alpha.

2665

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2666

* @param beta The scalar beta.

2667

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

2668

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2669

public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2670

validateTranspose(Trans);

2671

validateUplo(Uplo);

2672

validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);

2673

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2674

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2675

K = A.getType().getY();

2676

} else {

2677

K = A.getType().getX();

2678

}

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2679

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2680

C.getID(mRS), 0, 0, 0, 0);

2681

}

2682

2683

static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {

2684

validateTranspose(Trans);

2685

if (!A.getType().getElement().isCompatible(e) ||

2686

!B.getType().getElement().isCompatible(e) ||

2687

!C.getType().getElement().isCompatible(e)) {

2688

throw new RSRuntimeException("Called BLAS with wrong Element type");

2689

}

2690

int Cdim = -1;

2691

// A is n x k if no transpose, k x n if transpose

2692

// C is n x n

2693

if (Trans == TRANSPOSE) {

2694

// check columns versus C

2695

Cdim = A.getType().getX();

2696

} else {

2697

// check rows versus C

2698

Cdim = A.getType().getY();

2699

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2700

if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2701

throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");

2702

}

2703

// A dims == B dims

2704

if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {

2705

throw new RSRuntimeException("Invalid A and B in SYR2K");

2706

}

2707

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2708

2709

/**

2710

* SSYR2K performs one of the symmetric rank 2k operations

2711

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2712

*

2713

* Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html

2714

*

2715

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2716

* @param Trans The type of transpose applied to the operation.

2717

* @param alpha The scalar alpha.

2718

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2719

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2720

* @param beta The scalar beta.

2721

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2722

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2723

public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {

2724

validateUplo(Uplo);

2725

validateSYR2K(Element.F32(mRS), Trans, A, B, C);

2726

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2727

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2728

K = A.getType().getY();

2729

} else {

2730

K = A.getType().getX();

2731

}

2732

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);

2733

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2734

2735

/**

2736

* DSYR2K performs one of the symmetric rank 2k operations

2737

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2738

*

2739

* Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html

2740

*

2741

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2742

* @param Trans The type of transpose applied to the operation.

2743

* @param alpha The scalar alpha.

2744

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2745

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2746

* @param beta The scalar beta.

2747

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2748

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2749

public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {

2750

validateUplo(Uplo);

2751

validateSYR2K(Element.F64(mRS), Trans, A, B, C);

2752

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2753

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2754

K = A.getType().getY();

2755

} else {

2756

K = A.getType().getX();

2757

}

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2758

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2759

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2760

2761

/**

2762

* CSYR2K performs one of the symmetric rank 2k operations

2763

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2764

*

2765

* Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html

2766

*

2767

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2768

* @param Trans The type of transpose applied to the operation.

2769

* @param alpha The scalar alpha.

2770

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2771

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2772

* @param beta The scalar beta.

2773

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2774

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2775

public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {

2776

validateUplo(Uplo);

2777

validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);

2778

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2779

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2780

K = A.getType().getY();

2781

} else {

2782

K = A.getType().getX();

2783

}

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2784

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2785

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2786

2787

/**

2788

* ZSYR2K performs one of the symmetric rank 2k operations

2789

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2790

*

2791

* Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html

2792

*

2793

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2794

* @param Trans The type of transpose applied to the operation.

2795

* @param alpha The scalar alpha.

2796

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2797

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

2798

* @param beta The scalar beta.

2799

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

2800

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2801

public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {

2802

validateUplo(Uplo);

2803

validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);

2804

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2805

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2806

K = A.getType().getY();

2807

} else {

2808

K = A.getType().getX();

2809

}

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2810

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2811

}

2812

2813

static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {

2814

validateSide(Side);

2815

validateTranspose(TransA);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2816

int aM = -1, aN = -1, bM = -1, bN = -1;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2817

if (!A.getType().getElement().isCompatible(e) ||

2818

!B.getType().getElement().isCompatible(e)) {

2819

throw new RSRuntimeException("Called BLAS with wrong Element type");

2820

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2821

2822

aM = A.getType().getY();

2823

aN = A.getType().getX();

2824

if (aM != aN) {

2825

throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2826

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2827

2828

bM = B.getType().getY();

2829

bN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2830

if (Side == LEFT) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2831

if (aN != bM) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2832

throw new RSRuntimeException("Called TRMM with invalid matrices");

2833

}

2834

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2835

if (bN != aM) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2836

throw new RSRuntimeException("Called TRMM with invalid matrices");

2837

}

2838

}

2839

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2840

2841

/**

2842

* STRMM performs one of the matrix-matrix operations

2843

* B := alpha*op(A)*B or B := alpha*B*op(A)

2844

* op(A) is one of op(A) = A or op(A) = A**T

2845

*

2846

* Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html

2847

*

2848

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2849

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2850

* @param TransA The type of transpose applied to matrix A.

2851

* @param Diag Specifies whether or not A is unit triangular.

2852

* @param alpha The scalar alpha.

2853

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2854

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2855

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2856

public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {

2857

validateUplo(Uplo);

2858

validateDiag(Diag);

2859

validateTRMM(Element.F32(mRS), Side, TransA, A, B);

2860

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

2861

alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);

2862

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2863

2864

/**

2865

* DTRMM performs one of the matrix-matrix operations

2866

* B := alpha*op(A)*B or B := alpha*B*op(A)

2867

* op(A) is one of op(A) = A or op(A) = A**T

2868

*

2869

* Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html

2870

*

2871

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2872

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2873

* @param TransA The type of transpose applied to matrix A.

2874

* @param Diag Specifies whether or not A is unit triangular.

2875

* @param alpha The scalar alpha.

2876

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2877

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2878

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2879

public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {

2880

validateUplo(Uplo);

2881

validateDiag(Diag);

2882

validateTRMM(Element.F64(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2883

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

2884

alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2885

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2886

2887

/**

2888

* CTRMM performs one of the matrix-matrix operations

2889

* B := alpha*op(A)*B or B := alpha*B*op(A)

2890

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

2891

*

2892

* Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html

2893

*

2894

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2895

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2896

* @param TransA The type of transpose applied to matrix A.

2897

* @param Diag Specifies whether or not A is unit triangular.

2898

* @param alpha The scalar alpha.

2899

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2900

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2901

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2902

public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {

2903

validateUplo(Uplo);

2904

validateDiag(Diag);

2905

validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2906

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2907

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

2908

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2909

2910

/**

2911

* ZTRMM performs one of the matrix-matrix operations

2912

* B := alpha*op(A)*B or B := alpha*B*op(A)

2913

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

2914

*

2915

* Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html

2916

*

2917

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2918

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2919

* @param TransA The type of transpose applied to matrix A.

2920

* @param Diag Specifies whether or not A is unit triangular.

2921

* @param alpha The scalar alpha.

2922

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2923

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

2924

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2925

public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {

2926

validateUplo(Uplo);

2927

validateDiag(Diag);

2928

validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2929

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2930

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

2931

}

2932

2933

static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2934

int adim = -1, bM = -1, bN = -1;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2935

validateSide(Side);

2936

validateTranspose(TransA);

2937

if (!A.getType().getElement().isCompatible(e) ||

2938

!B.getType().getElement().isCompatible(e)) {

2939

throw new RSRuntimeException("Called BLAS with wrong Element type");

2940

}

2941

adim = A.getType().getX();

2942

if (adim != A.getType().getY()) {

2943

// this may be unnecessary, the restriction could potentially be relaxed

2944

// A needs to contain at least that symmetric matrix but could theoretically be larger

2945

// for now we assume adapters are sufficient, will reevaluate in the future

2946

throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");

2947

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2948

bM = B.getType().getY();

2949

bN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2950

if (Side == LEFT) {

2951

// A is M*M

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2952

if (adim != bM) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2953

throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");

2954

}

2955

} else {

2956

// A is N*N

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2957

if (adim != bN) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2958

throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");

2959

}

2960

}

2961

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2962

2963

/**

2964

* STRSM solves one of the matrix equations

2965

* op(A)*X := alpha*B or X*op(A) := alpha*B

2966

* op(A) is one of op(A) = A or op(A) = A**T

2967

*

2968

* Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html

2969

*

2970

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2971

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2972

* @param TransA The type of transpose applied to matrix A.

2973

* @param Diag Specifies whether or not A is unit triangular.

2974

* @param alpha The scalar alpha.

2975

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2976

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2977

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2978

public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {

2979

validateUplo(Uplo);

2980

validateDiag(Diag);

2981

validateTRSM(Element.F32(mRS), Side, TransA, A, B);

2982

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

2983

alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);

2984

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2985

2986

/**

2987

* DTRSM solves one of the matrix equations

2988

* op(A)*X := alpha*B or X*op(A) := alpha*B

2989

* op(A) is one of op(A) = A or op(A) = A**T

2990

*

2991

* Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html

2992

*

2993

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2994

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2995

* @param TransA The type of transpose applied to matrix A.

2996

* @param Diag Specifies whether or not A is unit triangular.

2997

* @param alpha The scalar alpha.

2998

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2999

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

3000

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3001

public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {

3002

validateUplo(Uplo);

3003

validateDiag(Diag);

3004

validateTRSM(Element.F64(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

3005

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3006

alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);

3007

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3008

3009

/**

3010

* CTRSM solves one of the matrix equations

3011

* op(A)*X := alpha*B or X*op(A) := alpha*B

3012

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

3013

*

3014

* Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html

3015

*

3016

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3017

* @param Uplo Specifies whether matrix A is upper or lower triangular.

3018

* @param TransA The type of transpose applied to matrix A.

3019

* @param Diag Specifies whether or not A is unit triangular.

3020

* @param alpha The scalar alpha.

3021

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3022

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

3023

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3024

public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {

3025

validateUplo(Uplo);

3026

validateDiag(Diag);

3027

validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

3028

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3029

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

3030

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3031

3032

/**

3033

* ZTRSM solves one of the matrix equations

3034

* op(A)*X := alpha*B or X*op(A) := alpha*B

3035

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

3036

*

3037

* Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html

3038

*

3039

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3040

* @param Uplo Specifies whether matrix A is upper or lower triangular.

3041

* @param TransA The type of transpose applied to matrix A.

3042

* @param Diag Specifies whether or not A is unit triangular.

3043

* @param alpha The scalar alpha.

3044

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3045

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

3046

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3047

public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {

3048

validateUplo(Uplo);

3049

validateDiag(Diag);

3050

validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

3051

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3052

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

3053

}

3054

3055

static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {

3056

validateSide(Side);

3057

3058

if (!A.getType().getElement().isCompatible(e) ||

3059

!B.getType().getElement().isCompatible(e) ||

3060

!C.getType().getElement().isCompatible(e)) {

3061

throw new RSRuntimeException("Called BLAS with wrong Element type");

3062

}

3063

3064

// A must be square; can potentially be relaxed similar to TRSM

3065

int adim = A.getType().getX();

3066

if (adim != A.getType().getY()) {

3067

throw new RSRuntimeException("Called HEMM with non-square A");

3068

}

3069

if ((Side == LEFT && adim != B.getType().getY()) ||

3070

(Side == RIGHT && adim != B.getType().getX())) {

3071

throw new RSRuntimeException("Called HEMM with invalid B");

3072

}

3073

if (B.getType().getX() != C.getType().getX() ||

3074

B.getType().getY() != C.getType().getY()) {

3075

throw new RSRuntimeException("Called HEMM with mismatched B and C");

3076

}

3077

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3078

3079

/**

3080

* CHEMM performs one of the matrix-matrix operations

3081

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

3082

*

3083

* Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html

3084

*

3085

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3086

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

3087

* @param alpha The scalar alpha.

3088

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3089

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

3090

* @param beta The scalar beta.

3091

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

3092

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3093

public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3094

validateUplo(Uplo);

3095

validateHEMM(Element.F32_2(mRS), Side, A, B, C);

3096

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3097

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3098

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3099

3100

/**

3101

* ZHEMM performs one of the matrix-matrix operations

3102

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

3103

*

3104

* Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html

3105

*

3106

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3107

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

3108

* @param alpha The scalar alpha.

3109

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3110

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

3111

* @param beta The scalar beta.

3112

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

3113

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3114

public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3115

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3116

validateHEMM(Element.F64_2(mRS), Side, A, B, C);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3117

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3118

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3119

}

3120

3121

static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {

3122

if (!A.getType().getElement().isCompatible(e) ||

3123

!C.getType().getElement().isCompatible(e)) {

3124

throw new RSRuntimeException("Called BLAS with wrong Element type");

3125

}

3126

validateConjTranspose(Trans);

3127

int cdim = C.getType().getX();

3128

if (cdim != C.getType().getY()) {

3129

throw new RSRuntimeException("Called HERK with non-square C");

3130

}

3131

if (Trans == NO_TRANSPOSE) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3132

if (cdim != A.getType().getY()) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3133

throw new RSRuntimeException("Called HERK with invalid A");

3134

}

3135

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3136

if (cdim != A.getType().getX()) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3137

throw new RSRuntimeException("Called HERK with invalid A");

3138

}

3139

}

3140

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3141

3142

/**

3143

* CHERK performs one of the hermitian rank k operations

3144

* C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C

3145

*

3146

* Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html

3147

*

3148

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3149

* @param Trans The type of transpose applied to the operation.

3150

* @param alpha The scalar alpha.

3151

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3152

* @param beta The scalar beta.

3153

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

3154

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3155

public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {

3156

validateUplo(Uplo);

3157

validateHERK(Element.F32_2(mRS), Trans, A, C);

3158

int k = 0;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3159

if (Trans == CONJ_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3160

k = A.getType().getY();

3161

} else {

3162

k = A.getType().getX();

3163

}

3164

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,

3165

alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);

3166

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3167

3168

/**

3169

* ZHERK performs one of the hermitian rank k operations

3170

* C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C

3171

*

3172

* Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html

3173

*

3174

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3175

* @param Trans The type of transpose applied to the operation.

3176

* @param alpha The scalar alpha.

3177

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3178

* @param beta The scalar beta.

3179

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

3180

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3181

public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {

3182

validateUplo(Uplo);

3183

validateHERK(Element.F64_2(mRS), Trans, A, C);

3184

int k = 0;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3185

if (Trans == CONJ_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3186

k = A.getType().getY();

3187

} else {

3188

k = A.getType().getX();

3189

}

3190

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,

3191

alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);

3192

}

3193

3194

static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {

3195

if (!A.getType().getElement().isCompatible(e) ||

3196

!B.getType().getElement().isCompatible(e) ||

3197

!C.getType().getElement().isCompatible(e)) {

3198

throw new RSRuntimeException("Called BLAS with wrong Element type");

3199

}

3200

validateConjTranspose(Trans);

3201

int cdim = C.getType().getX();

3202

if (cdim != C.getType().getY()) {

3203

throw new RSRuntimeException("Called HER2K with non-square C");

3204

}

3205

if (Trans == NO_TRANSPOSE) {

3206

if (A.getType().getY() != cdim) {

3207

throw new RSRuntimeException("Called HER2K with invalid matrices");

3208

}

3209

} else {

3210

if (A.getType().getX() != cdim) {

3211

throw new RSRuntimeException("Called HER2K with invalid matrices");

3212

}

3213

}

3214

if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {

3215

throw new RSRuntimeException("Called HER2K with invalid A and B matrices");

3216

}

3217

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3218

3219

/**

3220

* CHER2K performs one of the hermitian rank 2k operations

3221

* C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C

3222

*

3223

* Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html

3224

*

3225

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3226

* @param Trans The type of transpose applied to the operation.

3227

* @param alpha The scalar alpha.

3228

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3229

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

3230

* @param beta The scalar beta.

3231

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

3232

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3233

public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {

3234

validateUplo(Uplo);

3235

validateHER2K(Element.F32_2(mRS), Trans, A, B, C);

3236

int k = 0;

3237

if (Trans == NO_TRANSPOSE) {

3238

k = A.getType().getX();

3239

} else {

3240

k = A.getType().getY();

3241

}

3242

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,

3243

A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);

3244

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3245

3246

/**

3247

* ZHER2K performs one of the hermitian rank 2k operations

3248

* C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C

3249

*

3250

* Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html

3251

*

3252

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3253

* @param Trans The type of transpose applied to the operation.

3254

* @param alpha The scalar alpha.

3255

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3256

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

3257

* @param beta The scalar beta.

3258

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

3259

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3260

public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {

3261

validateUplo(Uplo);

3262

validateHER2K(Element.F64_2(mRS), Trans, A, B, C);

3263

int k = 0;

3264

if (Trans == NO_TRANSPOSE) {

3265

k = A.getType().getX();

3266

} else {

3267

k = A.getType().getY();

3268

}

3269

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,

3270

A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);

}

Tim Murray

2015-04-01 11:07:16 -0700

[diff] [blame]

3274

/**

Miao Wang

d7d413a

2015-07-15 11:35:28 -0700

[diff] [blame]

3275

* 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3276

* Calculations are done in 1.10.21 fixed-point format for the final output,

3277

* just before there's a shift down to drop the fractional parts. The output

3278

* values are gated to 0 to 255 to fit in a byte, but the 10-bit format

3279

* gives some headroom to avoid wrapping around on small overflows.

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3280

*

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3281

* @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3282

* @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3283

* @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3284

* @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3285

* @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.

3286

* @param c_offset The offset for all values in matrix C.

3287

* @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.

Tim Murray

9cb16a2

2015-04-01 11:07:16 -0700

[diff] [blame]

3288

**/

3289

public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {

3290

validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);

3291

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3292

if (a_offset < 0 || a_offset > 255) {

3293

throw new RSRuntimeException("Invalid a_offset passed to BNNM");

3294

}

3295

if (b_offset < 0 || b_offset > 255) {

3296

throw new RSRuntimeException("Invalid b_offset passed to BNNM");

3297

}

Tim Murray

9cb16a2

2015-04-01 11:07:16 -0700

[diff] [blame]

3298

int M = -1, N = -1, K = -1;

3299

M = A.getType().getY();

3300

N = B.getType().getY();

3301

K = A.getType().getX();

3302

3303

3304

mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult);

3305

3306

}

Tim Murray