Blame - rs/java/android/renderscript/ScriptIntrinsicBLAS.java - android_frameworks_base

2015-05-12 18:22:20 -0700

[diff] [blame]

330

331

/**

332

* DGEMV performs one of the matrix-vector operations

333

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y

334

*

335

* Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html

336

*

337

* @param TransA The type of transpose applied to matrix A.

338

* @param alpha The scalar alpha.

339

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

340

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

341

* @param incX The increment for the elements of vector x, must be larger than zero.

342

* @param beta The scalar beta.

343

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

344

* @param incY The increment for the elements of vector y, must be larger than zero.

345

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

346

public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

347

validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);

348

int M = A.getType().getY();

349

int N = A.getType().getX();

350

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

351

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

352

353

/**

354

* CGEMV performs one of the matrix-vector operations

355

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

356

*

357

* Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html

358

*

359

* @param TransA The type of transpose applied to matrix A.

360

* @param alpha The scalar alpha.

361

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

362

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

363

* @param incX The increment for the elements of vector x, must be larger than zero.

364

* @param beta The scalar beta.

365

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

366

* @param incY The increment for the elements of vector y, must be larger than zero.

367

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

368

public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

369

validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);

370

int M = A.getType().getY();

371

int N = A.getType().getX();

372

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

373

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

374

375

/**

376

* ZGEMV performs one of the matrix-vector operations

377

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

378

*

379

* Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html

380

*

381

* @param TransA The type of transpose applied to matrix A.

382

* @param alpha The scalar alpha.

383

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

384

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

385

* @param incX The increment for the elements of vector x, must be larger than zero.

386

* @param beta The scalar beta.

387

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

388

* @param incY The increment for the elements of vector y, must be larger than zero.

389

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

390

public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

391

validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);

392

int M = A.getType().getY();

393

int N = A.getType().getX();

394

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

395

}

396

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

397

/**

398

* SGBMV performs one of the matrix-vector operations

399

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y

400

*

401

* Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html

402

*

403

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

404

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

405

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

406

* for i in range(0, m):

407

* for j in range(max(0, i-kl), min(i+ku+1, n)):

408

* b[i, j-i+kl] = a[i, j]

409

*

410

* @param TransA The type of transpose applied to matrix A.

411

* @param KL The number of sub-diagonals of the matrix A.

412

* @param KU The number of super-diagonals of the matrix A.

413

* @param alpha The scalar alpha.

414

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.

415

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

416

* @param incX The increment for the elements of vector x, must be larger than zero.

417

* @param beta The scalar beta.

418

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

419

* @param incY The increment for the elements of vector y, must be larger than zero.

420

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

421

public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

422

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

423

validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);

424

if (KL < 0 || KU < 0) {

425

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

426

}

427

int M = A.getType().getY();

428

int N = A.getType().getX();

429

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);

430

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

431

432

/**

433

* DGBMV performs one of the matrix-vector operations

434

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y

435

*

436

* Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html

437

*

438

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

439

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

440

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

441

* for i in range(0, m):

442

* for j in range(max(0, i-kl), min(i+ku+1, n)):

443

* b[i, j-i+kl] = a[i, j]

444

*

445

* @param TransA The type of transpose applied to matrix A.

446

* @param KL The number of sub-diagonals of the matrix A.

447

* @param KU The number of super-diagonals of the matrix A.

448

* @param alpha The scalar alpha.

449

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.

450

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

451

* @param incX The increment for the elements of vector x, must be larger than zero.

452

* @param beta The scalar beta.

453

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

454

* @param incY The increment for the elements of vector y, must be larger than zero.

455

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

456

public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

457

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

458

validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);

459

if (KL < 0 || KU < 0) {

460

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

461

}

462

int M = A.getType().getY();

463

int N = A.getType().getX();

464

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);

465

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

466

467

/**

468

* CGBMV performs one of the matrix-vector operations

469

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

470

*

471

* Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html

472

*

473

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

474

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

475

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

476

* for i in range(0, m):

477

* for j in range(max(0, i-kl), min(i+ku+1, n)):

478

* b[i, j-i+kl] = a[i, j]

479

*

480

* @param TransA The type of transpose applied to matrix A.

481

* @param KL The number of sub-diagonals of the matrix A.

482

* @param KU The number of super-diagonals of the matrix A.

483

* @param alpha The scalar alpha.

484

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.

485

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

486

* @param incX The increment for the elements of vector x, must be larger than zero.

487

* @param beta The scalar beta.

488

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

489

* @param incY The increment for the elements of vector y, must be larger than zero.

490

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

491

public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

492

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

493

validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);

494

if (KL < 0 || KU < 0) {

495

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

496

}

497

int M = A.getType().getY();

498

int N = A.getType().getX();

499

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);

500

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

501

502

/**

503

* ZGBMV performs one of the matrix-vector operations

504

* y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y

505

*

506

* Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html

507

*

508

* Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),

509

* but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an

510

* example showing how to convert the original matrix 'a' to row-based band matrix 'b'.

511

* for i in range(0, m):

512

* for j in range(max(0, i-kl), min(i+ku+1, n)):

513

* b[i, j-i+kl] = a[i, j]

514

*

515

* @param TransA The type of transpose applied to matrix A.

516

* @param KL The number of sub-diagonals of the matrix A.

517

* @param KU The number of super-diagonals of the matrix A.

518

* @param alpha The scalar alpha.

519

* @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.

520

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

521

* @param incX The increment for the elements of vector x, must be larger than zero.

522

* @param beta The scalar beta.

523

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

524

* @param incY The increment for the elements of vector y, must be larger than zero.

525

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

526

public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

527

// GBMV has the same validation requirements as GEMV + KL and KU >= 0

528

validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);

529

if (KL < 0 || KU < 0) {

530

throw new RSRuntimeException("KL and KU must be greater than or equal to 0");

531

}

532

int M = A.getType().getY();

533

int N = A.getType().getX();

534

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);

535

}

536

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

537

static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

538

validateTranspose(TransA);

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

539

validateUplo(Uplo);

540

validateDiag(Diag);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

541

int N = A.getType().getY();

542

if (A.getType().getX() != N) {

543

throw new RSRuntimeException("A must be a square matrix for TRMV");

544

}

545

if (!A.getType().getElement().isCompatible(e) ||

546

!X.getType().getElement().isCompatible(e)) {

547

throw new RSRuntimeException("Called BLAS with wrong Element type");

548

}

549

if (X.getType().getY() > 1) {

550

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

}

if (incX <= 0) {

throw new RSRuntimeException("Vector increments must be greater than 0");

555

}

556

int expectedXDim = 1 + (N - 1) * incX;

557

if (X.getType().getX() != expectedXDim) {

558

throw new RSRuntimeException("Incorrect vector dimensions for TRMV");

}

}

static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

563

validateTranspose(TransA);

564

validateUplo(Uplo);

565

validateDiag(Diag);

566

if (!Ap.getType().getElement().isCompatible(e) ||

567

!X.getType().getElement().isCompatible(e)) {

568

throw new RSRuntimeException("Called BLAS with wrong Element type");

569

}

570

if (X.getType().getY() > 1) {

571

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

572

}

573

574

if (Ap.getType().getY() > 1) {

575

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

576

}

577

578

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

579

//is it really doing anything?

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

580

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

581

throw new RSRuntimeException("Invalid dimension for Ap");

582

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

583

if (incX <= 0) {

584

throw new RSRuntimeException("Vector increments must be greater than 0");

585

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

586

int expectedXDim = 1 + (N - 1) * incX;

587

if (X.getType().getX() != expectedXDim) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

588

throw new RSRuntimeException("Incorrect vector dimensions for TPMV");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

return N;

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

594

/**

595

* STRMV performs one of the matrix-vector operations

596

* x := A*x or x := A**T*x

597

*

598

* Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html

599

*

600

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

601

* @param TransA The type of transpose applied to matrix A.

602

* @param Diag Specifies whether or not A is unit triangular.

603

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

604

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

605

* @param incX The increment for the elements of vector x, must be larger than zero.

606

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

607

public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

608

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

609

int N = A.getType().getY();

610

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

611

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

612

613

/**

614

* DTRMV performs one of the matrix-vector operations

615

* x := A*x or x := A**T*x

616

*

617

* Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html

618

*

619

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

620

* @param TransA The type of transpose applied to matrix A.

621

* @param Diag Specifies whether or not A is unit triangular.

622

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

623

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

624

* @param incX The increment for the elements of vector x, must be larger than zero.

625

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

626

public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

627

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

628

int N = A.getType().getY();

629

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

630

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

631

632

/**

633

* CTRMV performs one of the matrix-vector operations

634

* x := A*x or x := A**T*x or x := A**H*x

635

*

636

* Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html

637

*

638

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

639

* @param TransA The type of transpose applied to matrix A.

640

* @param Diag Specifies whether or not A is unit triangular.

641

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

642

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

643

* @param incX The increment for the elements of vector x, must be larger than zero.

644

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

645

public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

646

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

647

int N = A.getType().getY();

648

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

649

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

650

651

/**

652

* ZTRMV performs one of the matrix-vector operations

653

* x := A*x or x := A**T*x or x := A**H*x

654

*

655

* Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html

656

*

657

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

658

* @param TransA The type of transpose applied to matrix A.

659

* @param Diag Specifies whether or not A is unit triangular.

660

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

661

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

662

* @param incX The increment for the elements of vector x, must be larger than zero.

663

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

664

public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

665

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

666

int N = A.getType().getY();

667

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

668

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

669

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

670

/**

671

* STBMV performs one of the matrix-vector operations

672

* x := A*x or x := A**T*x

673

*

674

* Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html

675

*

676

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

677

* but only the region N*(K+1) will be referenced. The following subroutine can is an

678

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

679

* for i in range(0, n):

680

* for j in range(i, min(i+k+1, n)):

681

* b[i, j-i] = a[i, j]

682

*

683

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

684

* @param TransA The type of transpose applied to matrix A.

685

* @param Diag Specifies whether or not A is unit triangular.

686

* @param K The number of off-diagonals of the matrix A

687

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

688

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

689

* @param incX The increment for the elements of vector x, must be larger than zero.

690

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

691

public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

692

// TBMV has the same requirements as TRMV + K >= 0

693

if (K < 0) {

694

throw new RSRuntimeException("K must be greater than or equal to 0");

695

}

696

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

697

int N = A.getType().getY();

698

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

699

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

700

701

/**

702

* DTBMV performs one of the matrix-vector operations

703

* x := A*x or x := A**T*x

704

*

705

* Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html

706

*

707

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

708

* but only the region N*(K+1) will be referenced. The following subroutine can is an

709

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

710

* for i in range(0, n):

711

* for j in range(i, min(i+k+1, n)):

712

* b[i, j-i] = a[i, j]

713

*

714

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

715

* @param TransA The type of transpose applied to matrix A.

716

* @param Diag Specifies whether or not A is unit triangular.

717

* @param K The number of off-diagonals of the matrix A

718

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

719

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

720

* @param incX The increment for the elements of vector x, must be larger than zero.

721

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

722

public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

723

// TBMV has the same requirements as TRMV + K >= 0

724

if (K < 0) {

725

throw new RSRuntimeException("K must be greater than or equal to 0");

726

}

727

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

728

int N = A.getType().getY();

729

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

730

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

731

732

/**

733

* CTBMV performs one of the matrix-vector operations

734

* x := A*x or x := A**T*x or x := A**H*x

735

*

736

* Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html

737

*

738

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

739

* but only the region N*(K+1) will be referenced. The following subroutine can is an

740

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

741

* for i in range(0, n):

742

* for j in range(i, min(i+k+1, n)):

743

* b[i, j-i] = a[i, j]

744

*

745

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

746

* @param TransA The type of transpose applied to matrix A.

747

* @param Diag Specifies whether or not A is unit triangular.

748

* @param K The number of off-diagonals of the matrix A

749

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

750

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

751

* @param incX The increment for the elements of vector x, must be larger than zero.

752

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

753

public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

754

// TBMV has the same requirements as TRMV + K >= 0

755

if (K < 0) {

756

throw new RSRuntimeException("K must be greater than or equal to 0");

757

}

758

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

759

int N = A.getType().getY();

760

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

761

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

762

763

/**

764

* ZTBMV performs one of the matrix-vector operations

765

* x := A*x or x := A**T*x or x := A**H*x

766

*

767

* Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html

768

*

769

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

770

* but only the region N*(K+1) will be referenced. The following subroutine can is an

771

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

772

* for i in range(0, n):

773

* for j in range(i, min(i+k+1, n)):

774

* b[i, j-i] = a[i, j]

775

*

776

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

777

* @param TransA The type of transpose applied to matrix A.

778

* @param Diag Specifies whether or not A is unit triangular.

779

* @param K The number of off-diagonals of the matrix A

780

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

781

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

782

* @param incX The increment for the elements of vector x, must be larger than zero.

783

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

784

public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

785

// TBMV has the same requirements as TRMV + K >= 0

786

if (K < 0) {

787

throw new RSRuntimeException("K must be greater than or equal to 0");

788

}

789

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

790

int N = A.getType().getY();

791

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

792

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

793

794

/**

795

* STPMV performs one of the matrix-vector operations

796

* x := A*x or x := A**T*x

797

*

798

* Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html

799

*

800

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

801

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

802

* 'a' to packed matrix 'b'.

803

* k = 0

804

* for i in range(0, n):

805

* for j in range(i, n):

806

* b[k++] = a[i, j]

807

*

808

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

809

* @param TransA The type of transpose applied to matrix A.

810

* @param Diag Specifies whether or not A is unit triangular.

811

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.

812

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

813

* @param incX The increment for the elements of vector x, must be larger than zero.

814

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

815

public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

816

int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);

817

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

818

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

819

820

/**

821

* DTPMV performs one of the matrix-vector operations

822

* x := A*x or x := A**T*x

823

*

824

* Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html

825

*

826

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

827

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

828

* 'a' to packed matrix 'b'.

829

* k = 0

830

* for i in range(0, n):

831

* for j in range(i, n):

832

* b[k++] = a[i, j]

833

*

834

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

835

* @param TransA The type of transpose applied to matrix A.

836

* @param Diag Specifies whether or not A is unit triangular.

837

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.

838

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

839

* @param incX The increment for the elements of vector x, must be larger than zero.

840

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

841

public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

842

int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);

843

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

844

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

845

846

/**

847

* CTPMV performs one of the matrix-vector operations

848

* x := A*x or x := A**T*x or x := A**H*x

849

*

850

* Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html

851

*

852

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

853

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

854

* 'a' to packed matrix 'b'.

855

* k = 0

856

* for i in range(0, n):

857

* for j in range(i, n):

858

* b[k++] = a[i, j]

859

*

860

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

861

* @param TransA The type of transpose applied to matrix A.

862

* @param Diag Specifies whether or not A is unit triangular.

863

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.

864

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

865

* @param incX The increment for the elements of vector x, must be larger than zero.

866

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

867

public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

868

int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

869

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

870

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

871

872

/**

873

* ZTPMV performs one of the matrix-vector operations

874

* x := A*x or x := A**T*x or x := A**H*x

875

*

876

* Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html

877

*

878

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

879

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

880

* 'a' to packed matrix 'b'.

881

* k = 0

882

* for i in range(0, n):

883

* for j in range(i, n):

884

* b[k++] = a[i, j]

885

*

886

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

887

* @param TransA The type of transpose applied to matrix A.

888

* @param Diag Specifies whether or not A is unit triangular.

889

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.

890

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

891

* @param incX The increment for the elements of vector x, must be larger than zero.

892

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

893

public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

894

int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

895

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

896

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

897

898

/**

899

* STRSV solves one of the systems of equations

900

* A*x = b or A**T*x = b

901

*

902

* Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html

903

*

904

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

905

* @param TransA The type of transpose applied to matrix A.

906

* @param Diag Specifies whether or not A is unit triangular.

907

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

908

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

909

* @param incX The increment for the elements of vector x, must be larger than zero.

910

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

911

public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

912

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

913

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

914

int N = A.getType().getY();

915

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

916

917

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

918

919

/**

920

* DTRSV solves one of the systems of equations

921

* A*x = b or A**T*x = b

922

*

923

* Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html

924

*

925

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

926

* @param TransA The type of transpose applied to matrix A.

927

* @param Diag Specifies whether or not A is unit triangular.

928

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

929

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

930

* @param incX The increment for the elements of vector x, must be larger than zero.

931

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

932

public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

933

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

934

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

935

int N = A.getType().getY();

936

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

937

938

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

939

940

/**

941

* CTRSV solves one of the systems of equations

942

* A*x = b or A**T*x = b or A**H*x = b

943

*

944

* Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html

945

*

946

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

947

* @param TransA The type of transpose applied to matrix A.

948

* @param Diag Specifies whether or not A is unit triangular.

949

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

950

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

951

* @param incX The increment for the elements of vector x, must be larger than zero.

952

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

953

public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

954

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

955

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

956

int N = A.getType().getY();

957

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

958

959

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

960

961

/**

962

* ZTRSV solves one of the systems of equations

963

* A*x = b or A**T*x = b or A**H*x = b

964

*

965

* Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html

966

*

967

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

968

* @param TransA The type of transpose applied to matrix A.

969

* @param Diag Specifies whether or not A is unit triangular.

970

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

971

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

972

* @param incX The increment for the elements of vector x, must be larger than zero.

973

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

974

public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

975

// TRSV is the same as TRMV

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

976

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

977

int N = A.getType().getY();

978

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

979

980

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

981

982

/**

983

* STBSV solves one of the systems of equations

984

* A*x = b or A**T*x = b

985

*

986

* Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html

987

*

988

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

989

* but only the region N*(K+1) will be referenced. The following subroutine can is an

990

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

991

* for i in range(0, n):

992

* for j in range(i, min(i+k+1, n)):

993

* b[i, j-i] = a[i, j]

994

*

995

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

996

* @param TransA The type of transpose applied to matrix A.

997

* @param Diag Specifies whether or not A is unit triangular.

998

* @param K The number of off-diagonals of the matrix A

999

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1000

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1001

* @param incX The increment for the elements of vector x, must be larger than zero.

1002

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1003

public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1004

// TBSV is the same as TRMV + K >= 0

1005

validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1006

int N = A.getType().getY();

1007

if (K < 0) {

1008

throw new RSRuntimeException("Number of diagonals must be positive");

1009

}

1010

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1011

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1012

1013

/**

1014

* DTBSV solves one of the systems of equations

1015

* A*x = b or A**T*x = b

1016

*

1017

* Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html

1018

*

1019

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1020

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1021

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1022

* for i in range(0, n):

1023

* for j in range(i, min(i+k+1, n)):

1024

* b[i, j-i] = a[i, j]

1025

*

1026

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1027

* @param TransA The type of transpose applied to matrix A.

1028

* @param Diag Specifies whether or not A is unit triangular.

1029

* @param K The number of off-diagonals of the matrix A

1030

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1031

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1032

* @param incX The increment for the elements of vector x, must be larger than zero.

1033

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1034

public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1035

// TBSV is the same as TRMV + K >= 0

1036

validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1037

int N = A.getType().getY();

1038

if (K < 0) {

1039

throw new RSRuntimeException("Number of diagonals must be positive");

1040

}

1041

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1042

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1043

1044

/**

1045

* CTBSV solves one of the systems of equations

1046

* A*x = b or A**T*x = b or A**H*x = b

1047

*

1048

* Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html

1049

*

1050

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1051

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1052

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1053

* for i in range(0, n):

1054

* for j in range(i, min(i+k+1, n)):

1055

* b[i, j-i] = a[i, j]

1056

*

1057

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1058

* @param TransA The type of transpose applied to matrix A.

1059

* @param Diag Specifies whether or not A is unit triangular.

1060

* @param K The number of off-diagonals of the matrix A

1061

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1062

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1063

* @param incX The increment for the elements of vector x, must be larger than zero.

1064

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1065

public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1066

// TBSV is the same as TRMV + K >= 0

1067

validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1068

int N = A.getType().getY();

1069

if (K < 0) {

1070

throw new RSRuntimeException("Number of diagonals must be positive");

1071

}

1072

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

1073

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1074

1075

/**

1076

* ZTBSV solves one of the systems of equations

1077

* A*x = b or A**T*x = b or A**H*x = b

1078

*

1079

* Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html

1080

*

1081

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1082

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1083

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1084

* for i in range(0, n):

1085

* for j in range(i, min(i+k+1, n)):

1086

* b[i, j-i] = a[i, j]

1087

*

1088

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1089

* @param TransA The type of transpose applied to matrix A.

1090

* @param Diag Specifies whether or not A is unit triangular.

1091

* @param K The number of off-diagonals of the matrix A

1092

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

1093

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

1094

* @param incX The increment for the elements of vector x, must be larger than zero.

1095

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1096

public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1097

// TBSV is the same as TRMV + K >= 0

1098

validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1099

int N = A.getType().getY();

1100

if (K < 0) {

1101

throw new RSRuntimeException("Number of diagonals must be positive");

1102

}

1103

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

1104

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1105

1106

/**

1107

* STPSV solves one of the systems of equations

1108

* A*x = b or A**T*x = b

1109

*

1110

* Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html

1111

*

1112

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1113

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1114

* 'a' to packed matrix 'b'.

1115

* k = 0

1116

* for i in range(0, n):

1117

* for j in range(i, n):

1118

* b[k++] = a[i, j]

1119

*

1120

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1121

* @param TransA The type of transpose applied to matrix A.

1122

* @param Diag Specifies whether or not A is unit triangular.

1123

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.

1124

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1125

* @param incX The increment for the elements of vector x, must be larger than zero.

1126

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1127

public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1128

// TPSV is same as TPMV

1129

int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);

1130

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1131

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1132

1133

/**

1134

* DTPSV solves one of the systems of equations

1135

* A*x = b or A**T*x = b

1136

*

1137

* Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html

1138

*

1139

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1140

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1141

* 'a' to packed matrix 'b'.

1142

* k = 0

1143

* for i in range(0, n):

1144

* for j in range(i, n):

1145

* b[k++] = a[i, j]

1146

*

1147

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1148

* @param TransA The type of transpose applied to matrix A.

1149

* @param Diag Specifies whether or not A is unit triangular.

1150

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.

1151

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1152

* @param incX The increment for the elements of vector x, must be larger than zero.

1153

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1154

public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1155

// TPSV is same as TPMV

1156

int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);

1157

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);

1158

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1159

1160

/**

1161

* CTPSV solves one of the systems of equations

1162

* A*x = b or A**T*x = b or A**H*x = b

1163

*

1164

* Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html

1165

*

1166

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1167

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1168

* 'a' to packed matrix 'b'.

1169

* k = 0

1170

* for i in range(0, n):

1171

* for j in range(i, n):

1172

* b[k++] = a[i, j]

1173

*

1174

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1175

* @param TransA The type of transpose applied to matrix A.

1176

* @param Diag Specifies whether or not A is unit triangular.

1177

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.

1178

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1179

* @param incX The increment for the elements of vector x, must be larger than zero.

1180

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1181

public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1182

// TPSV is same as TPMV

1183

int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

1184

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

1185

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1186

1187

/**

1188

* ZTPSV solves one of the systems of equations

1189

* A*x = b or A**T*x = b or A**H*x = b

1190

*

1191

* Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html

1192

*

1193

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1194

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1195

* 'a' to packed matrix 'b'.

1196

* k = 0

1197

* for i in range(0, n):

1198

* for j in range(i, n):

1199

* b[k++] = a[i, j]

1200

*

1201

* @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.

1202

* @param TransA The type of transpose applied to matrix A.

1203

* @param Diag Specifies whether or not A is unit triangular.

1204

* @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.

1205

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

1206

* @param incX The increment for the elements of vector x, must be larger than zero.

1207

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1208

public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1209

// TPSV is same as TPMV

1210

int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);

1211

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);

}

/**

* Level 2, S and D only

1216

*/

1217

static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {

1218

validateUplo(Uplo);

1219

int N = A.getType().getY();

1220

if (A.getType().getX() != N) {

1221

throw new RSRuntimeException("A must be a square matrix for SYMV");

1222

}

1223

if (!A.getType().getElement().isCompatible(e) ||

1224

!X.getType().getElement().isCompatible(e) ||

1225

!Y.getType().getElement().isCompatible(e) ) {

1226

throw new RSRuntimeException("Called BLAS with wrong Element type");

1227

}

1228

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1229

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1230

}

1231

1232

if (incX <= 0 || incY <= 0) {

1233

throw new RSRuntimeException("Vector increments must be greater than 0");

1234

}

1235

int expectedXDim = 1 + (N - 1) * incX;

1236

if (X.getType().getX() != expectedXDim) {

1237

throw new RSRuntimeException("Incorrect vector dimensions for SYMV");

1238

}

1239

int expectedYDim = 1 + (N - 1) * incY;

1240

if (Y.getType().getX() != expectedYDim) {

1241

throw new RSRuntimeException("Incorrect vector dimensions for SYMV");

}

return N;

}

static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {

1246

validateUplo(Uplo);

1247

if (!Ap.getType().getElement().isCompatible(e) ||

1248

!X.getType().getElement().isCompatible(e) ||

1249

!Y.getType().getElement().isCompatible(e)) {

1250

throw new RSRuntimeException("Called BLAS with wrong Element type");

1251

}

1252

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1253

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1254

}

1255

1256

if (Ap.getType().getY() > 1) {

1257

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

1258

}

1259

1260

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

1261

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

1262

throw new RSRuntimeException("Invalid dimension for Ap");

1263

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1264

if (incX <= 0 || incY <= 0) {

1265

throw new RSRuntimeException("Vector increments must be greater than 0");

1266

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1267

int expectedXDim = 1 + (N - 1) * incX;

1268

if (X.getType().getX() != expectedXDim) {

1269

throw new RSRuntimeException("Incorrect vector dimensions for SPMV");

1270

}

1271

int expectedYDim = 1 + (N - 1) * incY;

1272

if (Y.getType().getX() != expectedYDim) {

1273

throw new RSRuntimeException("Incorrect vector dimensions for SPMV");

}

return N;

}

static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

1279

if (!A.getType().getElement().isCompatible(e) ||

1280

!X.getType().getElement().isCompatible(e) ||

1281

!Y.getType().getElement().isCompatible(e) ) {

1282

throw new RSRuntimeException("Called BLAS with wrong Element type");

1283

}

1284

1285

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1286

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1287

}

1288

1289

int M = A.getType().getY();

1290

int N = A.getType().getX();

1291

1292

if (N < 1 || M < 1) {

1293

throw new RSRuntimeException("M and N must be 1 or greater for GER");

1294

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1295

if (incX <= 0 || incY <= 0) {

1296

throw new RSRuntimeException("Vector increments must be greater than 0");

1297

}

1298

int expectedXDim = 1 + (M - 1) * incX;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1299

if (X.getType().getX() != expectedXDim) {

1300

throw new RSRuntimeException("Incorrect vector dimensions for GER");

1301

}

1302

int expectedYDim = 1 + (N - 1) * incY;

1303

if (Y.getType().getX() != expectedYDim) {

1304

throw new RSRuntimeException("Incorrect vector dimensions for GER");

}

}

static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {

1310

validateUplo(Uplo);

1311

if (!A.getType().getElement().isCompatible(e) ||

1312

!X.getType().getElement().isCompatible(e)) {

1313

throw new RSRuntimeException("Called BLAS with wrong Element type");

1314

}

1315

1316

int N = A.getType().getX();

1317

1318

if (X.getType().getY() > 1) {

1319

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1320

}

1321

if (N != A.getType().getY()) {

1322

throw new RSRuntimeException("A must be a symmetric matrix");

1323

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1324

if (incX <= 0) {

1325

throw new RSRuntimeException("Vector increments must be greater than 0");

1326

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1327

int expectedXDim = 1 + (N - 1) * incX;

1328

if (X.getType().getX() != expectedXDim) {

1329

throw new RSRuntimeException("Incorrect vector dimensions for SYR");

}

return N;

}

static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {

1334

validateUplo(Uplo);

1335

if (!Ap.getType().getElement().isCompatible(e) ||

1336

!X.getType().getElement().isCompatible(e)) {

1337

throw new RSRuntimeException("Called BLAS with wrong Element type");

1338

}

1339

if (X.getType().getY() > 1) {

1340

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1341

}

1342

1343

if (Ap.getType().getY() > 1) {

1344

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

1345

}

1346

1347

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

1348

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

1349

throw new RSRuntimeException("Invalid dimension for Ap");

1350

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1351

if (incX <= 0) {

1352

throw new RSRuntimeException("Vector increments must be greater than 0");

1353

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1354

int expectedXDim = 1 + (N - 1) * incX;

1355

if (X.getType().getX() != expectedXDim) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1356

throw new RSRuntimeException("Incorrect vector dimensions for SPR");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

return N;

}

static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

1363

validateUplo(Uplo);

1364

if (!A.getType().getElement().isCompatible(e) ||

1365

!X.getType().getElement().isCompatible(e) ||

1366

!Y.getType().getElement().isCompatible(e)) {

1367

throw new RSRuntimeException("Called BLAS with wrong Element type");

1368

}

1369

1370

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1371

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1372

}

1373

1374

int N = A.getType().getX();

1375

1376

if (N != A.getType().getY()) {

1377

throw new RSRuntimeException("A must be a symmetric matrix");

1378

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1379

if (incX <= 0 || incY <= 0) {

1380

throw new RSRuntimeException("Vector increments must be greater than 0");

1381

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1382

int expectedXDim = 1 + (N - 1) * incX;

1383

int expectedYDim = 1 + (N - 1) * incY;

1384

if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {

1385

throw new RSRuntimeException("Incorrect vector dimensions for SYR");

}

return N;

}

static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

1391

validateUplo(Uplo);

1392

if (!Ap.getType().getElement().isCompatible(e) ||

1393

!X.getType().getElement().isCompatible(e) ||

1394

!Y.getType().getElement().isCompatible(e)) {

1395

throw new RSRuntimeException("Called BLAS with wrong Element type");

1396

}

1397

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1398

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1399

}

1400

1401

if (Ap.getType().getY() > 1) {

1402

throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");

1403

}

1404

1405

int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);

1406

if (Ap.getType().getX() != ((N * (N+1)) / 2)) {

1407

throw new RSRuntimeException("Invalid dimension for Ap");

1408

}

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1409

if (incX <= 0 || incY <= 0) {

1410

throw new RSRuntimeException("Vector increments must be greater than 0");

1411

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1412

int expectedXDim = 1 + (N - 1) * incX;

1413

int expectedYDim = 1 + (N - 1) * incY;

1414

if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1415

throw new RSRuntimeException("Incorrect vector dimensions for SPR2");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

return N;

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1421

/**

1422

* SSYMV performs the matrix-vector operation

1423

* y := alpha*A*x + beta*y

1424

*

1425

* Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html

1426

*

1427

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1428

* @param alpha The scalar alpha.

1429

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1430

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1431

* @param incX The increment for the elements of vector x, must be larger than zero.

1432

* @param beta The scalar beta.

1433

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1434

* @param incY The increment for the elements of vector y, must be larger than zero.

1435

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1436

public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1437

int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);

1438

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1439

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1440

1441

/**

1442

* SSBMV performs the matrix-vector operation

1443

* y := alpha*A*x + beta*y

1444

*

1445

* Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html

1446

*

1447

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1448

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1449

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1450

* for i in range(0, n):

1451

* for j in range(i, min(i+k+1, n)):

1452

* b[i, j-i] = a[i, j]

1453

*

1454

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

1455

* @param K The number of off-diagonals of the matrix A

1456

* @param alpha The scalar alpha.

1457

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1458

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1459

* @param incX The increment for the elements of vector x, must be larger than zero.

1460

* @param beta The scalar beta.

1461

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1462

* @param incY The increment for the elements of vector y, must be larger than zero.

1463

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1464

public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1465

// SBMV is the same as SYMV + K >= 0

1466

if (K < 0) {

1467

throw new RSRuntimeException("K must be greater than or equal to 0");

1468

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1469

int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);

1470

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1471

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1472

1473

/**

1474

* SSPMV performs the matrix-vector operation

1475

* y := alpha*A*x + beta*y

1476

*

1477

* Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html

1478

*

1479

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1480

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1481

* 'a' to packed matrix 'b'.

1482

* k = 0

1483

* for i in range(0, n):

1484

* for j in range(i, n):

1485

* b[k++] = a[i, j]

1486

*

1487

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

1488

* @param alpha The scalar alpha.

1489

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.

1490

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1491

* @param incX The increment for the elements of vector x, must be larger than zero.

1492

* @param beta The scalar beta.

1493

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1494

* @param incY The increment for the elements of vector y, must be larger than zero.

1495

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1496

public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1497

int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);

1498

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1499

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1500

1501

/**

1502

* SGER performs the rank 1 operation

1503

* A := alpha*x*y**T + A

1504

*

1505

* Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html

1506

*

1507

* @param alpha The scalar alpha.

1508

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1509

* @param incX The increment for the elements of vector x, must be larger than zero.

1510

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1511

* @param incY The increment for the elements of vector y, must be larger than zero.

1512

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1513

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1514

public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1515

int M = A.getType().getY();

1516

int N = A.getType().getX();

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1517

validateGER(Element.F32(mRS), X, incX, Y, incY, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1518

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);

1519

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1520

1521

/**

1522

* SSYR performs the rank 1 operation

1523

* A := alpha*x*x**T + A

1524

*

1525

* Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html

1526

*

1527

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1528

* @param alpha The scalar alpha.

1529

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1530

* @param incX The increment for the elements of vector x, must be larger than zero.

1531

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1532

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1533

public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1534

int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);

1535

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1536

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1537

1538

/**

1539

* SSPR performs the rank 1 operation

1540

* A := alpha*x*x**T + A

1541

*

1542

* Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html

1543

*

1544

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1545

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1546

* 'a' to packed matrix 'b'.

1547

* k = 0

1548

* for i in range(0, n):

1549

* for j in range(i, n):

1550

* b[k++] = a[i, j]

1551

*

1552

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1553

* @param alpha The scalar alpha.

1554

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1555

* @param incX The increment for the elements of vector x, must be larger than zero.

1556

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.

1557

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1558

public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1559

int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);

1560

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1561

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1562

1563

/**

1564

* SSYR2 performs the symmetric rank 2 operation

1565

* A := alpha*x*y**T + alpha*y*x**T + A

1566

*

1567

* Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html

1568

*

1569

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1570

* @param alpha The scalar alpha.

1571

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1572

* @param incX The increment for the elements of vector x, must be larger than zero.

1573

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1574

* @param incY The increment for the elements of vector y, must be larger than zero.

1575

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

1576

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1577

public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1578

int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);

1579

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);

1580

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1581

1582

/**

1583

* SSPR2 performs the symmetric rank 2 operation

1584

* A := alpha*x*y**T + alpha*y*x**T + A

1585

*

1586

* Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html

1587

*

1588

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1589

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1590

* 'a' to packed matrix 'b'.

1591

* k = 0

1592

* for i in range(0, n):

1593

* for j in range(i, n):

1594

* b[k++] = a[i, j]

1595

*

1596

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1597

* @param alpha The scalar alpha.

1598

* @param X The input allocation contains vector x, supported elements type {@link Element#F32}.

1599

* @param incX The increment for the elements of vector x, must be larger than zero.

1600

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.

1601

* @param incY The increment for the elements of vector y, must be larger than zero.

1602

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.

1603

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1604

public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1605

int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);

1606

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);

1607

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1608

1609

/**

1610

* DSYMV performs the matrix-vector operation

1611

* y := alpha*A*x + beta*y

1612

*

1613

* Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html

1614

*

1615

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1616

* @param alpha The scalar alpha.

1617

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1618

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1619

* @param incX The increment for the elements of vector x, must be larger than zero.

1620

* @param beta The scalar beta.

1621

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1622

* @param incY The increment for the elements of vector y, must be larger than zero.

1623

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1624

public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1625

int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);

1626

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1627

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1628

1629

/**

1630

* DSBMV performs the matrix-vector operation

1631

* y := alpha*A*x + beta*y

1632

*

1633

* Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html

1634

*

1635

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1636

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1637

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1638

* for i in range(0, n):

1639

* for j in range(i, min(i+k+1, n)):

1640

* b[i, j-i] = a[i, j]

1641

*

1642

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

1643

* @param K The number of off-diagonals of the matrix A

1644

* @param alpha The scalar alpha.

1645

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1646

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1647

* @param incX The increment for the elements of vector x, must be larger than zero.

1648

* @param beta The scalar beta.

1649

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1650

* @param incY The increment for the elements of vector y, must be larger than zero.

1651

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1652

public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1653

// SBMV is the same as SYMV + K >= 0

1654

if (K < 0) {

1655

throw new RSRuntimeException("K must be greater than or equal to 0");

1656

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1657

int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);

1658

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1659

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1660

1661

/**

1662

* DSPMV performs the matrix-vector operation

1663

* y := alpha*A*x + beta*y

1664

*

1665

* Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html

1666

*

1667

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1668

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1669

* 'a' to packed matrix 'b'.

1670

* k = 0

1671

* for i in range(0, n):

1672

* for j in range(i, n):

1673

* b[k++] = a[i, j]

1674

*

1675

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

1676

* @param alpha The scalar alpha.

1677

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.

1678

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1679

* @param incX The increment for the elements of vector x, must be larger than zero.

1680

* @param beta The scalar beta.

1681

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1682

* @param incY The increment for the elements of vector y, must be larger than zero.

1683

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1684

public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1685

int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);

1686

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);

1687

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1688

1689

/**

1690

* DGER performs the rank 1 operation

1691

* A := alpha*x*y**T + A

1692

*

1693

* Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html

1694

*

1695

* @param alpha The scalar alpha.

1696

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1697

* @param incX The increment for the elements of vector x, must be larger than zero.

1698

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1699

* @param incY The increment for the elements of vector y, must be larger than zero.

1700

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1701

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1702

public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1703

int M = A.getType().getY();

1704

int N = A.getType().getX();

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1705

validateGER(Element.F64(mRS), X, incX, Y, incY, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1706

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);

1707

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1708

1709

/**

1710

* DSYR performs the rank 1 operation

1711

* A := alpha*x*x**T + A

1712

*

1713

* Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html

1714

*

1715

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1716

* @param alpha The scalar alpha.

1717

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1718

* @param incX The increment for the elements of vector x, must be larger than zero.

1719

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1720

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1721

public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1722

int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);

1723

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1724

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1725

1726

/**

1727

* DSPR performs the rank 1 operation

1728

* A := alpha*x*x**T + A

1729

*

1730

* Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html

1731

*

1732

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1733

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1734

* 'a' to packed matrix 'b'.

1735

* k = 0

1736

* for i in range(0, n):

1737

* for j in range(i, n):

1738

* b[k++] = a[i, j]

1739

*

1740

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1741

* @param alpha The scalar alpha.

1742

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1743

* @param incX The increment for the elements of vector x, must be larger than zero.

1744

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.

1745

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1746

public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1747

int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);

1748

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);

1749

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1750

1751

/**

1752

* DSYR2 performs the symmetric rank 2 operation

1753

* A := alpha*x*y**T + alpha*y*x**T + A

1754

*

1755

* Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html

1756

*

1757

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1758

* @param alpha The scalar alpha.

1759

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1760

* @param incX The increment for the elements of vector x, must be larger than zero.

1761

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1762

* @param incY The increment for the elements of vector y, must be larger than zero.

1763

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

1764

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1765

public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1766

int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);

1767

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);

1768

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1769

1770

/**

1771

* DSPR2 performs the symmetric rank 2 operation

1772

* A := alpha*x*y**T + alpha*y*x**T + A

1773

*

1774

* Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html

1775

*

1776

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1777

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1778

* 'a' to packed matrix 'b'.

1779

* k = 0

1780

* for i in range(0, n):

1781

* for j in range(i, n):

1782

* b[k++] = a[i, j]

1783

*

1784

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1785

* @param alpha The scalar alpha.

1786

* @param X The input allocation contains vector x, supported elements type {@link Element#F64}.

1787

* @param incX The increment for the elements of vector x, must be larger than zero.

1788

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.

1789

* @param incY The increment for the elements of vector y, must be larger than zero.

1790

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.

1791

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1792

public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1793

int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);

1794

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);

}

/**

* Level 2, C and Z only

1800

*/

1801

1802

static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

1803

if (!A.getType().getElement().isCompatible(e) ||

1804

!X.getType().getElement().isCompatible(e) ||

1805

!Y.getType().getElement().isCompatible(e)) {

1806

throw new RSRuntimeException("Called BLAS with wrong Element type");

1807

}

1808

if (X.getType().getY() > 1 || Y.getType().getY() > 1) {

1809

throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");

1810

}

1811

1812

int M = A.getType().getY();

1813

int N = A.getType().getX();

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1814

if (incX <= 0 || incY <= 0) {

1815

throw new RSRuntimeException("Vector increments must be greater than 0");

1816

}

1817

int expectedXDim = 1 + (M - 1) * incX;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1818

if (X.getType().getX() != expectedXDim) {

1819

throw new RSRuntimeException("Incorrect vector dimensions for GERU");

1820

}

1821

int expectedYDim = 1 + (N - 1) * incY;

1822

if (Y.getType().getX() != expectedYDim) {

1823

throw new RSRuntimeException("Incorrect vector dimensions for GERU");

}

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1828

/**

1829

* CHEMV performs the matrix-vector operation

1830

* y := alpha*A*x + beta*y

1831

*

1832

* Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html

1833

*

1834

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1835

* @param alpha The scalar alpha.

1836

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1837

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1838

* @param incX The increment for the elements of vector x, must be larger than zero.

1839

* @param beta The scalar beta.

1840

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1841

* @param incY The increment for the elements of vector y, must be larger than zero.

1842

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1843

public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1844

// HEMV is the same as SYR2 validation-wise

1845

int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);

1846

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

1847

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1848

1849

/**

1850

* CHBMV performs the matrix-vector operation

1851

* y := alpha*A*x + beta*y

1852

*

1853

* Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html

1854

*

1855

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

1856

* but only the region N*(K+1) will be referenced. The following subroutine can is an

1857

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

1858

* for i in range(0, n):

1859

* for j in range(i, min(i+k+1, n)):

1860

* b[i, j-i] = a[i, j]

1861

*

1862

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

1863

* @param K The number of off-diagonals of the matrix A

1864

* @param alpha The scalar alpha.

1865

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1866

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1867

* @param incX The increment for the elements of vector x, must be larger than zero.

1868

* @param beta The scalar beta.

1869

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1870

* @param incY The increment for the elements of vector y, must be larger than zero.

1871

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1872

public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1873

// HBMV is the same as SYR2 validation-wise

1874

int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);

1875

if (K < 0) {

1876

throw new RSRuntimeException("K must be 0 or greater for HBMV");

1877

}

1878

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

1879

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1880

1881

/**

1882

* CHPMV performs the matrix-vector operation

1883

* y := alpha*A*x + beta*y

1884

*

1885

* Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html

1886

*

1887

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1888

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1889

* 'a' to packed matrix 'b'.

1890

* k = 0

1891

* for i in range(0, n):

1892

* for j in range(i, n):

1893

* b[k++] = a[i, j]

1894

*

1895

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

1896

* @param alpha The scalar alpha.

1897

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1898

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1899

* @param incX The increment for the elements of vector x, must be larger than zero.

1900

* @param beta The scalar beta.

1901

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1902

* @param incY The increment for the elements of vector y, must be larger than zero.

1903

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1904

public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1905

// HPMV is the same as SPR2

1906

int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);

1907

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

1908

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1909

1910

/**

1911

* CGERU performs the rank 1 operation

1912

* A := alpha*x*y**T + A

1913

*

1914

* Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html

1915

*

1916

* @param alpha The scalar alpha.

1917

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1918

* @param incX The increment for the elements of vector x, must be larger than zero.

1919

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1920

* @param incY The increment for the elements of vector y, must be larger than zero.

1921

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1922

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1923

public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1924

validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);

1925

int M = A.getType().getY();

1926

int N = A.getType().getX();

1927

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

1928

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1929

1930

/**

1931

* CGERC performs the rank 1 operation

1932

* A := alpha*x*y**H + A

1933

*

1934

* Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html

1935

*

1936

* @param alpha The scalar alpha.

1937

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1938

* @param incX The increment for the elements of vector x, must be larger than zero.

1939

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

1940

* @param incY The increment for the elements of vector y, must be larger than zero.

1941

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1942

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1943

public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1944

// same as GERU

1945

validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);

1946

int M = A.getType().getY();

1947

int N = A.getType().getX();

1948

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

1949

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1950

1951

/**

1952

* CHER performs the rank 1 operation

1953

* A := alpha*x*x**H + A

1954

*

1955

* Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html

1956

*

1957

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

1958

* @param alpha The scalar alpha.

1959

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1960

* @param incX The increment for the elements of vector x, must be larger than zero.

1961

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1962

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1963

public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1964

// same as SYR

Miao Wang

2015-04-23 15:06:09 -0700

[diff] [blame]

1965

int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1966

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);

1967

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1968

1969

/**

1970

* CHPR performs the rank 1 operation

1971

* A := alpha*x*x**H + A

1972

*

1973

* Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html

1974

*

1975

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

1976

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

1977

* 'a' to packed matrix 'b'.

1978

* k = 0

1979

* for i in range(0, n):

1980

* for j in range(i, n):

1981

* b[k++] = a[i, j]

1982

*

1983

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

1984

* @param alpha The scalar alpha.

1985

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

1986

* @param incX The increment for the elements of vector x, must be larger than zero.

1987

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

1988

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

1989

public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

1990

// equivalent to SPR for validation

1991

int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);

1992

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);

1993

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

1994

1995

/**

1996

* CHER2 performs the symmetric rank 2 operation

1997

* A := alpha*x*y**H + alpha*y*x**H + A

1998

*

1999

* Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html

2000

*

2001

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2002

* @param alpha The scalar alpha.

2003

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

2004

* @param incX The increment for the elements of vector x, must be larger than zero.

2005

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

2006

* @param incY The increment for the elements of vector y, must be larger than zero.

2007

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2008

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2009

public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2010

// same as SYR2

2011

int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);

2012

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2013

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2014

2015

/**

2016

* CHPR2 performs the symmetric rank 2 operation

2017

* A := alpha*x*y**H + alpha*y*x**H + A

2018

*

2019

* Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html

2020

*

2021

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2022

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2023

* 'a' to packed matrix 'b'.

2024

* k = 0

2025

* for i in range(0, n):

2026

* for j in range(i, n):

2027

* b[k++] = a[i, j]

2028

*

2029

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

2030

* @param alpha The scalar alpha.

2031

* @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.

2032

* @param incX The increment for the elements of vector x, must be larger than zero.

2033

* @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.

2034

* @param incY The increment for the elements of vector y, must be larger than zero.

2035

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2036

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2037

public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2038

// same as SPR2

2039

int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);

2040

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);

2041

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2042

2043

/**

2044

* ZHEMV performs the matrix-vector operation

2045

* y := alpha*A*x + beta*y

2046

*

2047

* Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html

2048

*

2049

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2050

* @param alpha The scalar alpha.

2051

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2052

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2053

* @param incX The increment for the elements of vector x, must be larger than zero.

2054

* @param beta The scalar beta.

2055

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2056

* @param incY The increment for the elements of vector y, must be larger than zero.

2057

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2058

public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2059

// HEMV is the same as SYR2 validation-wise

2060

int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);

2061

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

2062

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2063

2064

/**

2065

* ZHBMV performs the matrix-vector operation

2066

* y := alpha*A*x + beta*y

2067

*

2068

* Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html

2069

*

2070

* Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),

2071

* but only the region N*(K+1) will be referenced. The following subroutine can is an

2072

* example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.

2073

* for i in range(0, n):

2074

* for j in range(i, min(i+k+1, n)):

2075

* b[i, j-i] = a[i, j]

2076

*

2077

* @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.

2078

* @param K The number of off-diagonals of the matrix A

2079

* @param alpha The scalar alpha.

2080

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2081

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2082

* @param incX The increment for the elements of vector x, must be larger than zero.

2083

* @param beta The scalar beta.

2084

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2085

* @param incY The increment for the elements of vector y, must be larger than zero.

2086

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2087

public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2088

// HBMV is the same as SYR2 validation-wise

2089

int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);

2090

if (K < 0) {

2091

throw new RSRuntimeException("K must be 0 or greater for HBMV");

2092

}

2093

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

2094

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2095

2096

/**

2097

* ZHPMV performs the matrix-vector operation

2098

* y := alpha*A*x + beta*y

2099

*

2100

* Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html

2101

*

2102

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2103

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2104

* 'a' to packed matrix 'b'.

2105

* k = 0

2106

* for i in range(0, n):

2107

* for j in range(i, n):

2108

* b[k++] = a[i, j]

2109

*

2110

* @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.

2111

* @param alpha The scalar alpha.

2112

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2113

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2114

* @param incX The increment for the elements of vector x, must be larger than zero.

2115

* @param beta The scalar beta.

2116

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2117

* @param incY The increment for the elements of vector y, must be larger than zero.

2118

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2119

public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2120

// HPMV is the same as SPR2

2121

int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);

2122

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);

2123

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2124

2125

/**

2126

* ZGERU performs the rank 1 operation

2127

* A := alpha*x*y**T + A

2128

*

2129

* Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html

2130

*

2131

* @param alpha The scalar alpha.

2132

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2133

* @param incX The increment for the elements of vector x, must be larger than zero.

2134

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2135

* @param incY The increment for the elements of vector y, must be larger than zero.

2136

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2137

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2138

public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2139

validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);

2140

int M = A.getType().getY();

2141

int N = A.getType().getX();

2142

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2143

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2144

2145

/**

2146

* ZGERC performs the rank 1 operation

2147

* A := alpha*x*y**H + A

2148

*

2149

* Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html

2150

*

2151

* @param alpha The scalar alpha.

2152

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2153

* @param incX The increment for the elements of vector x, must be larger than zero.

2154

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2155

* @param incY The increment for the elements of vector y, must be larger than zero.

2156

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2157

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2158

public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2159

// same as GERU

2160

validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);

2161

int M = A.getType().getY();

2162

int N = A.getType().getX();

2163

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2164

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2165

2166

/**

2167

* ZHER performs the rank 1 operation

2168

* A := alpha*x*x**H + A

2169

*

2170

* Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html

2171

*

2172

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2173

* @param alpha The scalar alpha.

2174

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2175

* @param incX The increment for the elements of vector x, must be larger than zero.

2176

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2177

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2178

public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2179

// same as SYR

Miao Wang

cecc00a

2015-04-29 18:14:55 -0700

[diff] [blame]

2180

int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2181

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);

2182

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2183

2184

/**

2185

* ZHPR performs the rank 1 operation

2186

* A := alpha*x*x**H + A

2187

*

2188

* Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html

2189

*

2190

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2191

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2192

* 'a' to packed matrix 'b'.

2193

* k = 0

2194

* for i in range(0, n):

2195

* for j in range(i, n):

2196

* b[k++] = a[i, j]

2197

*

2198

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

2199

* @param alpha The scalar alpha.

2200

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2201

* @param incX The increment for the elements of vector x, must be larger than zero.

2202

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2203

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2204

public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2205

// equivalent to SPR for validation

2206

int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);

2207

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);

2208

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2209

2210

/**

2211

* ZHER2 performs the symmetric rank 2 operation

2212

* A := alpha*x*y**H + alpha*y*x**H + A

2213

*

2214

* Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html

2215

*

2216

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2217

* @param alpha The scalar alpha.

2218

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2219

* @param incX The increment for the elements of vector x, must be larger than zero.

2220

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2221

* @param incY The increment for the elements of vector y, must be larger than zero.

2222

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2223

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2224

public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2225

// same as SYR2

2226

int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);

2227

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);

2228

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2229

2230

/**

2231

* ZHPR2 performs the symmetric rank 2 operation

2232

* A := alpha*x*y**H + alpha*y*x**H + A

2233

*

2234

* Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html

2235

*

2236

* Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,

2237

* The following subroutine can is an example showing how to convert a UPPER trianglar matrix

2238

* 'a' to packed matrix 'b'.

2239

* k = 0

2240

* for i in range(0, n):

2241

* for j in range(i, n):

2242

* b[k++] = a[i, j]

2243

*

2244

* @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.

2245

* @param alpha The scalar alpha.

2246

* @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.

2247

* @param incX The increment for the elements of vector x, must be larger than zero.

2248

* @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.

2249

* @param incY The increment for the elements of vector y, must be larger than zero.

2250

* @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2251

*/

Miao Wang

2015-04-23 15:20:11 -0700

[diff] [blame]

2252

public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2253

// same as SPR2

2254

int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);

2255

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);

}

/**

* Level 3 BLAS

*/

static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2264

int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2265

if ((A != null && !A.getType().getElement().isCompatible(e)) ||

2266

(B != null && !B.getType().getElement().isCompatible(e)) ||

2267

(C != null && !C.getType().getElement().isCompatible(e))) {

2268

throw new RSRuntimeException("Called BLAS with wrong Element type");

2269

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2270

if (C == null) {

2271

//since matrix C is used to store the result, it cannot be null.

2272

throw new RSRuntimeException("Allocation C cannot be null");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2273

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2274

cM = C.getType().getY();

2275

cN = C.getType().getX();

2276

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2277

if (Side == RIGHT) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2278

if ((A == null && B != null) || (A != null && B == null)) {

2279

throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");

2280

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2281

if (B != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2282

bM = A.getType().getY();

2283

bN = A.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2284

}

2285

if (A != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2286

aM = B.getType().getY();

2287

aN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2288

}

2289

} else {

2290

if (A != null) {

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2291

if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2292

aN = A.getType().getY();

2293

aM = A.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2294

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2295

aM = A.getType().getY();

2296

aN = A.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2297

}

2298

}

2299

if (B != null) {

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2300

if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2301

bN = B.getType().getY();

2302

bM = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2303

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2304

bM = B.getType().getY();

2305

bN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

}

}

if (A != null && B != null && C != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2310

if (aN != bM || aM != cM || bN != cN) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2311

throw new RSRuntimeException("Called BLAS with invalid dimensions");

2312

}

2313

} else if (A != null && C != null) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2314

// A and C only, for SYRK

2315

if (cM != cN) {

2316

throw new RSRuntimeException("Matrix C is not symmetric");

2317

}

Miao Wang

50a8ff1

2015-05-01 15:32:24 -0700

[diff] [blame]

2318

if (aM != cM) {

2319

throw new RSRuntimeException("Called BLAS with invalid dimensions");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2320

}

2321

} else if (A != null && B != null) {

2322

// A and B only

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2323

if (aN != bM) {

2324

throw new RSRuntimeException("Called BLAS with invalid dimensions");

2325

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

}

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2330

/**

2331

* SGEMM performs one of the matrix-matrix operations

2332

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T

2333

*

2334

* Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html

2335

*

2336

* @param TransA The type of transpose applied to matrix A.

2337

* @param TransB The type of transpose applied to matrix B.

2338

* @param alpha The scalar alpha.

2339

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2340

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2341

* @param beta The scalar beta.

2342

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2343

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2344

public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,

2345

Allocation B, float beta, Allocation C) {

2346

validateTranspose(TransA);

2347

validateTranspose(TransB);

2348

validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);

2349

2350

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2351

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2352

M = A.getType().getX();

2353

K = A.getType().getY();

2354

} else {

2355

M = A.getType().getY();

2356

K = A.getType().getX();

2357

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2358

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2359

N = B.getType().getY();

2360

} else {

2361

N = B.getType().getX();

2362

}

2363

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),

2364

beta, C.getID(mRS), 0, 0, 0, 0);

2365

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2366

2367

/**

2368

* DGEMM performs one of the matrix-matrix operations

2369

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T

2370

*

2371

* Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html

2372

*

2373

* @param TransA The type of transpose applied to matrix A.

2374

* @param TransB The type of transpose applied to matrix B.

2375

* @param alpha The scalar alpha.

2376

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2377

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2378

* @param beta The scalar beta.

2379

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2380

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2381

public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,

2382

Allocation B, double beta, Allocation C) {

2383

validateTranspose(TransA);

2384

validateTranspose(TransB);

2385

validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);

2386

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2387

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2388

M = A.getType().getX();

2389

K = A.getType().getY();

2390

} else {

2391

M = A.getType().getY();

2392

K = A.getType().getX();

2393

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2394

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2395

N = B.getType().getY();

2396

} else {

2397

N = B.getType().getX();

2398

}

2399

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),

2400

beta, C.getID(mRS), 0, 0, 0, 0);

2401

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2402

2403

/**

2404

* CGEMM performs one of the matrix-matrix operations

2405

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H

2406

*

2407

* Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html

2408

*

2409

* @param TransA The type of transpose applied to matrix A.

2410

* @param TransB The type of transpose applied to matrix B.

2411

* @param alpha The scalar alpha.

2412

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2413

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2414

* @param beta The scalar beta.

2415

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2416

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2417

public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,

2418

Allocation B, Float2 beta, Allocation C) {

2419

validateTranspose(TransA);

2420

validateTranspose(TransB);

2421

validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);

2422

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2423

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2424

M = A.getType().getX();

2425

K = A.getType().getY();

2426

} else {

2427

M = A.getType().getY();

2428

K = A.getType().getX();

2429

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2430

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2431

N = B.getType().getY();

2432

} else {

2433

N = B.getType().getX();

2434

}

2435

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2436

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2437

}

2438

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2439

/**

2440

* ZGEMM performs one of the matrix-matrix operations

2441

* C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H

2442

*

2443

* Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html

2444

*

2445

* @param TransA The type of transpose applied to matrix A.

2446

* @param TransB The type of transpose applied to matrix B.

2447

* @param alpha The scalar alpha.

Elliot Waite

54de7747

2017-01-11 15:30:35 -0800

[diff] [blame]

2448

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2449

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2450

* @param beta The scalar beta.

Elliot Waite

54de7747

2017-01-11 15:30:35 -0800

[diff] [blame]

2451

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2452

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2453

public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,

2454

Allocation B, Double2 beta, Allocation C) {

2455

validateTranspose(TransA);

2456

validateTranspose(TransB);

2457

validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);

2458

int M = -1, N = -1, K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2459

if (TransA != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2460

M = A.getType().getX();

2461

K = A.getType().getY();

2462

} else {

2463

M = A.getType().getY();

2464

K = A.getType().getX();

2465

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2466

if (TransB != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2467

N = B.getType().getY();

2468

} else {

2469

N = B.getType().getX();

2470

}

2471

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2472

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2473

}

2474

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2475

/**

2476

* SSYMM performs one of the matrix-matrix operations

2477

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2478

*

2479

* Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html

2480

*

2481

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2482

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2483

* @param alpha The scalar alpha.

2484

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2485

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2486

* @param beta The scalar beta.

2487

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2488

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2489

public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,

2490

Allocation B, float beta, Allocation C) {

2491

validateSide(Side);

2492

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2493

//For SYMM, Matrix A should be symmetric

2494

if (A.getType().getX() != A.getType().getY()) {

2495

throw new RSRuntimeException("Matrix A is not symmetric");

2496

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2497

validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);

2498

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),

2499

beta, C.getID(mRS), 0, 0, 0, 0);

2500

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2501

2502

/**

2503

* DSYMM performs one of the matrix-matrix operations

2504

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2505

*

2506

* Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html

2507

*

2508

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2509

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2510

* @param alpha The scalar alpha.

2511

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2512

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2513

* @param beta The scalar beta.

2514

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2515

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2516

public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,

2517

Allocation B, double beta, Allocation C) {

2518

validateSide(Side);

2519

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2520

if (A.getType().getX() != A.getType().getY()) {

2521

throw new RSRuntimeException("Matrix A is not symmetric");

2522

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2523

validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);

2524

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),

2525

beta, C.getID(mRS), 0, 0, 0, 0);

2526

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2527

2528

/**

2529

* CSYMM performs one of the matrix-matrix operations

2530

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2531

*

2532

* Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html

2533

*

2534

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2535

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2536

* @param alpha The scalar alpha.

2537

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2538

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2539

* @param beta The scalar beta.

2540

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2541

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2542

public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,

2543

Allocation B, Float2 beta, Allocation C) {

2544

validateSide(Side);

2545

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2546

if (A.getType().getX() != A.getType().getY()) {

2547

throw new RSRuntimeException("Matrix A is not symmetric");

2548

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2549

validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);

2550

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2551

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2552

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2553

2554

/**

2555

* ZSYMM performs one of the matrix-matrix operations

2556

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

2557

*

2558

* Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html

2559

*

2560

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2561

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

2562

* @param alpha The scalar alpha.

2563

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2564

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

2565

* @param beta The scalar beta.

2566

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

2567

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2568

public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,

2569

Allocation B, Double2 beta, Allocation C) {

2570

validateSide(Side);

2571

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2572

if (A.getType().getX() != A.getType().getY()) {

2573

throw new RSRuntimeException("Matrix A is not symmetric");

2574

}

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2575

validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);

2576

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),

2577

beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

2578

}

2579

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2580

/**

2581

* SSYRK performs one of the symmetric rank k operations

2582

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2583

*

2584

* Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html

2585

*

2586

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2587

* @param Trans The type of transpose applied to the operation.

2588

* @param alpha The scalar alpha.

2589

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2590

* @param beta The scalar beta.

2591

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2592

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2593

public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {

2594

validateTranspose(Trans);

2595

validateUplo(Uplo);

2596

validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);

2597

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2598

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2599

K = A.getType().getY();

2600

} else {

2601

K = A.getType().getX();

2602

}

2603

2604

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);

2605

}

2606

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2607

/**

2608

* DSYRK performs one of the symmetric rank k operations

2609

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2610

*

2611

* Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html

2612

*

2613

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2614

* @param Trans The type of transpose applied to the operation.

2615

* @param alpha The scalar alpha.

2616

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2617

* @param beta The scalar beta.

2618

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2619

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2620

public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {

2621

validateTranspose(Trans);

2622

validateUplo(Uplo);

2623

validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);

2624

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2625

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2626

K = A.getType().getY();

2627

} else {

2628

K = A.getType().getX();

2629

}

2630

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);

2631

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2632

2633

/**

2634

* CSYRK performs one of the symmetric rank k operations

2635

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2636

*

2637

* Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html

2638

*

2639

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2640

* @param Trans The type of transpose applied to the operation.

2641

* @param alpha The scalar alpha.

2642

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2643

* @param beta The scalar beta.

2644

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2645

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2646

public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2647

validateTranspose(Trans);

2648

validateUplo(Uplo);

2649

validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);

2650

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2651

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2652

K = A.getType().getY();

2653

} else {

2654

K = A.getType().getX();

2655

}

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2656

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2657

C.getID(mRS), 0, 0, 0, 0);

2658

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2659

2660

/**

2661

* ZSYRK performs one of the symmetric rank k operations

2662

* C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C

2663

*

2664

* Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html

2665

*

2666

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2667

* @param Trans The type of transpose applied to the operation.

2668

* @param alpha The scalar alpha.

2669

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2670

* @param beta The scalar beta.

2671

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

2672

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2673

public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2674

validateTranspose(Trans);

2675

validateUplo(Uplo);

2676

validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);

2677

int K = -1;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2678

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2679

K = A.getType().getY();

2680

} else {

2681

K = A.getType().getX();

2682

}

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

2683

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2684

C.getID(mRS), 0, 0, 0, 0);

2685

}

2686

2687

static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {

2688

validateTranspose(Trans);

2689

if (!A.getType().getElement().isCompatible(e) ||

2690

!B.getType().getElement().isCompatible(e) ||

2691

!C.getType().getElement().isCompatible(e)) {

2692

throw new RSRuntimeException("Called BLAS with wrong Element type");

2693

}

2694

int Cdim = -1;

2695

// A is n x k if no transpose, k x n if transpose

2696

// C is n x n

2697

if (Trans == TRANSPOSE) {

2698

// check columns versus C

2699

Cdim = A.getType().getX();

2700

} else {

2701

// check rows versus C

2702

Cdim = A.getType().getY();

2703

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2704

if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2705

throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");

2706

}

2707

// A dims == B dims

2708

if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {

2709

throw new RSRuntimeException("Invalid A and B in SYR2K");

2710

}

2711

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2712

2713

/**

2714

* SSYR2K performs one of the symmetric rank 2k operations

2715

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2716

*

2717

* Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html

2718

*

2719

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2720

* @param Trans The type of transpose applied to the operation.

2721

* @param alpha The scalar alpha.

2722

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2723

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2724

* @param beta The scalar beta.

2725

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.

2726

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2727

public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {

2728

validateUplo(Uplo);

2729

validateSYR2K(Element.F32(mRS), Trans, A, B, C);

2730

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2731

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2732

K = A.getType().getY();

2733

} else {

2734

K = A.getType().getX();

2735

}

2736

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);

2737

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2738

2739

/**

2740

* DSYR2K performs one of the symmetric rank 2k operations

2741

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2742

*

2743

* Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html

2744

*

2745

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2746

* @param Trans The type of transpose applied to the operation.

2747

* @param alpha The scalar alpha.

2748

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2749

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2750

* @param beta The scalar beta.

2751

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.

2752

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2753

public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {

2754

validateUplo(Uplo);

2755

validateSYR2K(Element.F64(mRS), Trans, A, B, C);

2756

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2757

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2758

K = A.getType().getY();

2759

} else {

2760

K = A.getType().getX();

2761

}

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2762

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2763

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2764

2765

/**

2766

* CSYR2K performs one of the symmetric rank 2k operations

2767

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2768

*

2769

* Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html

2770

*

2771

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2772

* @param Trans The type of transpose applied to the operation.

2773

* @param alpha The scalar alpha.

2774

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2775

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2776

* @param beta The scalar beta.

2777

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

2778

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2779

public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {

2780

validateUplo(Uplo);

2781

validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);

2782

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2783

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2784

K = A.getType().getY();

2785

} else {

2786

K = A.getType().getX();

2787

}

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2788

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2789

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2790

2791

/**

2792

* ZSYR2K performs one of the symmetric rank 2k operations

2793

* C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C

2794

*

2795

* Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html

2796

*

2797

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

2798

* @param Trans The type of transpose applied to the operation.

2799

* @param alpha The scalar alpha.

2800

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2801

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

2802

* @param beta The scalar beta.

2803

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

2804

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2805

public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {

2806

validateUplo(Uplo);

2807

validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);

2808

int K = -1;

Miao Wang

2015-04-30 10:47:42 -0700

[diff] [blame]

2809

if (Trans != NO_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2810

K = A.getType().getY();

2811

} else {

2812

K = A.getType().getX();

2813

}

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2814

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2815

}

2816

2817

static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {

2818

validateSide(Side);

2819

validateTranspose(TransA);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2820

int aM = -1, aN = -1, bM = -1, bN = -1;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2821

if (!A.getType().getElement().isCompatible(e) ||

2822

!B.getType().getElement().isCompatible(e)) {

2823

throw new RSRuntimeException("Called BLAS with wrong Element type");

2824

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2825

2826

aM = A.getType().getY();

2827

aN = A.getType().getX();

2828

if (aM != aN) {

2829

throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2830

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2831

2832

bM = B.getType().getY();

2833

bN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2834

if (Side == LEFT) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2835

if (aN != bM) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2836

throw new RSRuntimeException("Called TRMM with invalid matrices");

2837

}

2838

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2839

if (bN != aM) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2840

throw new RSRuntimeException("Called TRMM with invalid matrices");

2841

}

2842

}

2843

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2844

2845

/**

2846

* STRMM performs one of the matrix-matrix operations

2847

* B := alpha*op(A)*B or B := alpha*B*op(A)

2848

* op(A) is one of op(A) = A or op(A) = A**T

2849

*

2850

* Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html

2851

*

2852

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2853

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2854

* @param TransA The type of transpose applied to matrix A.

2855

* @param Diag Specifies whether or not A is unit triangular.

2856

* @param alpha The scalar alpha.

2857

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2858

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2859

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2860

public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {

2861

validateUplo(Uplo);

2862

validateDiag(Diag);

2863

validateTRMM(Element.F32(mRS), Side, TransA, A, B);

2864

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

2865

alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);

2866

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2867

2868

/**

2869

* DTRMM performs one of the matrix-matrix operations

2870

* B := alpha*op(A)*B or B := alpha*B*op(A)

2871

* op(A) is one of op(A) = A or op(A) = A**T

2872

*

2873

* Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html

2874

*

2875

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2876

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2877

* @param TransA The type of transpose applied to matrix A.

2878

* @param Diag Specifies whether or not A is unit triangular.

2879

* @param alpha The scalar alpha.

2880

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

2881

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

2882

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2883

public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {

2884

validateUplo(Uplo);

2885

validateDiag(Diag);

2886

validateTRMM(Element.F64(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2887

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

2888

alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2889

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2890

2891

/**

2892

* CTRMM performs one of the matrix-matrix operations

2893

* B := alpha*op(A)*B or B := alpha*B*op(A)

2894

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

2895

*

2896

* Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html

2897

*

2898

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2899

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2900

* @param TransA The type of transpose applied to matrix A.

2901

* @param Diag Specifies whether or not A is unit triangular.

2902

* @param alpha The scalar alpha.

2903

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

2904

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

2905

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2906

public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {

2907

validateUplo(Uplo);

2908

validateDiag(Diag);

2909

validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2910

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2911

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

2912

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2913

2914

/**

2915

* ZTRMM performs one of the matrix-matrix operations

2916

* B := alpha*op(A)*B or B := alpha*B*op(A)

2917

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

2918

*

2919

* Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html

2920

*

2921

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2922

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2923

* @param TransA The type of transpose applied to matrix A.

2924

* @param Diag Specifies whether or not A is unit triangular.

2925

* @param alpha The scalar alpha.

2926

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

2927

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

2928

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2929

public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {

2930

validateUplo(Uplo);

2931

validateDiag(Diag);

2932

validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

2933

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2934

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

2935

}

2936

2937

static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2938

int adim = -1, bM = -1, bN = -1;

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2939

validateSide(Side);

2940

validateTranspose(TransA);

2941

if (!A.getType().getElement().isCompatible(e) ||

2942

!B.getType().getElement().isCompatible(e)) {

2943

throw new RSRuntimeException("Called BLAS with wrong Element type");

2944

}

2945

adim = A.getType().getX();

2946

if (adim != A.getType().getY()) {

2947

// this may be unnecessary, the restriction could potentially be relaxed

2948

// A needs to contain at least that symmetric matrix but could theoretically be larger

2949

// for now we assume adapters are sufficient, will reevaluate in the future

2950

throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");

2951

}

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2952

bM = B.getType().getY();

2953

bN = B.getType().getX();

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2954

if (Side == LEFT) {

2955

// A is M*M

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2956

if (adim != bM) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2957

throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");

2958

}

2959

} else {

2960

// A is N*N

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

2961

if (adim != bN) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2962

throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");

2963

}

2964

}

2965

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2966

2967

/**

2968

* STRSM solves one of the matrix equations

2969

* op(A)*X := alpha*B or X*op(A) := alpha*B

2970

* op(A) is one of op(A) = A or op(A) = A**T

2971

*

2972

* Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html

2973

*

2974

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2975

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2976

* @param TransA The type of transpose applied to matrix A.

2977

* @param Diag Specifies whether or not A is unit triangular.

2978

* @param alpha The scalar alpha.

2979

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.

2980

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.

2981

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

2982

public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {

2983

validateUplo(Uplo);

2984

validateDiag(Diag);

2985

validateTRSM(Element.F32(mRS), Side, TransA, A, B);

2986

mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

2987

alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);

2988

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

2989

2990

/**

2991

* DTRSM solves one of the matrix equations

2992

* op(A)*X := alpha*B or X*op(A) := alpha*B

2993

* op(A) is one of op(A) = A or op(A) = A**T

2994

*

2995

* Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html

2996

*

2997

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

2998

* @param Uplo Specifies whether matrix A is upper or lower triangular.

2999

* @param TransA The type of transpose applied to matrix A.

3000

* @param Diag Specifies whether or not A is unit triangular.

3001

* @param alpha The scalar alpha.

3002

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.

3003

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.

3004

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3005

public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {

3006

validateUplo(Uplo);

3007

validateDiag(Diag);

3008

validateTRSM(Element.F64(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

3009

mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3010

alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);

3011

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3012

3013

/**

3014

* CTRSM solves one of the matrix equations

3015

* op(A)*X := alpha*B or X*op(A) := alpha*B

3016

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

3017

*

3018

* Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html

3019

*

3020

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3021

* @param Uplo Specifies whether matrix A is upper or lower triangular.

3022

* @param TransA The type of transpose applied to matrix A.

3023

* @param Diag Specifies whether or not A is unit triangular.

3024

* @param alpha The scalar alpha.

3025

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3026

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

3027

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3028

public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {

3029

validateUplo(Uplo);

3030

validateDiag(Diag);

3031

validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

3032

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3033

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

3034

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3035

3036

/**

3037

* ZTRSM solves one of the matrix equations

3038

* op(A)*X := alpha*B or X*op(A) := alpha*B

3039

* op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H

3040

*

3041

* Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html

3042

*

3043

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3044

* @param Uplo Specifies whether matrix A is upper or lower triangular.

3045

* @param TransA The type of transpose applied to matrix A.

3046

* @param Diag Specifies whether or not A is unit triangular.

3047

* @param alpha The scalar alpha.

3048

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3049

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

3050

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3051

public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {

3052

validateUplo(Uplo);

3053

validateDiag(Diag);

3054

validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);

Miao Wang

2015-04-30 17:14:28 -0700

[diff] [blame]

3055

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3056

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);

3057

}

3058

3059

static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {

3060

validateSide(Side);

3061

3062

if (!A.getType().getElement().isCompatible(e) ||

3063

!B.getType().getElement().isCompatible(e) ||

3064

!C.getType().getElement().isCompatible(e)) {

3065

throw new RSRuntimeException("Called BLAS with wrong Element type");

3066

}

3067

3068

// A must be square; can potentially be relaxed similar to TRSM

3069

int adim = A.getType().getX();

3070

if (adim != A.getType().getY()) {

3071

throw new RSRuntimeException("Called HEMM with non-square A");

3072

}

3073

if ((Side == LEFT && adim != B.getType().getY()) ||

3074

(Side == RIGHT && adim != B.getType().getX())) {

3075

throw new RSRuntimeException("Called HEMM with invalid B");

3076

}

3077

if (B.getType().getX() != C.getType().getX() ||

3078

B.getType().getY() != C.getType().getY()) {

3079

throw new RSRuntimeException("Called HEMM with mismatched B and C");

3080

}

3081

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3082

3083

/**

3084

* CHEMM performs one of the matrix-matrix operations

3085

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

3086

*

3087

* Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html

3088

*

3089

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3090

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

3091

* @param alpha The scalar alpha.

3092

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3093

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

3094

* @param beta The scalar beta.

3095

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

3096

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3097

public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3098

validateUplo(Uplo);

3099

validateHEMM(Element.F32_2(mRS), Side, A, B, C);

3100

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3101

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3102

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3103

3104

/**

3105

* ZHEMM performs one of the matrix-matrix operations

3106

* C := alpha*A*B + beta*C or C := alpha*B*A + beta*C

3107

*

3108

* Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html

3109

*

3110

* @param Side Specifies whether the symmetric matrix A appears on the left or right.

3111

* @param Uplo Specifies whether the upper or lower triangular part is to be referenced.

3112

* @param alpha The scalar alpha.

3113

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3114

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

3115

* @param beta The scalar beta.

3116

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

3117

*/

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3118

public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3119

validateUplo(Uplo);

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3120

validateHEMM(Element.F64_2(mRS), Side, A, B, C);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3121

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,

Miao Wang

2015-04-22 15:57:57 -0700

[diff] [blame]

3122

alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3123

}

3124

3125

static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {

3126

if (!A.getType().getElement().isCompatible(e) ||

3127

!C.getType().getElement().isCompatible(e)) {

3128

throw new RSRuntimeException("Called BLAS with wrong Element type");

3129

}

3130

validateConjTranspose(Trans);

3131

int cdim = C.getType().getX();

3132

if (cdim != C.getType().getY()) {

3133

throw new RSRuntimeException("Called HERK with non-square C");

3134

}

3135

if (Trans == NO_TRANSPOSE) {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3136

if (cdim != A.getType().getY()) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3137

throw new RSRuntimeException("Called HERK with invalid A");

3138

}

3139

} else {

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3140

if (cdim != A.getType().getX()) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3141

throw new RSRuntimeException("Called HERK with invalid A");

3142

}

3143

}

3144

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3145

3146

/**

3147

* CHERK performs one of the hermitian rank k operations

3148

* C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C

3149

*

3150

* Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html

3151

*

3152

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3153

* @param Trans The type of transpose applied to the operation.

3154

* @param alpha The scalar alpha.

3155

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3156

* @param beta The scalar beta.

3157

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

3158

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3159

public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {

3160

validateUplo(Uplo);

3161

validateHERK(Element.F32_2(mRS), Trans, A, C);

3162

int k = 0;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3163

if (Trans == CONJ_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3164

k = A.getType().getY();

3165

} else {

3166

k = A.getType().getX();

3167

}

3168

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,

3169

alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);

3170

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3171

3172

/**

3173

* ZHERK performs one of the hermitian rank k operations

3174

* C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C

3175

*

3176

* Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html

3177

*

3178

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3179

* @param Trans The type of transpose applied to the operation.

3180

* @param alpha The scalar alpha.

3181

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3182

* @param beta The scalar beta.

3183

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

3184

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3185

public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {

3186

validateUplo(Uplo);

3187

validateHERK(Element.F64_2(mRS), Trans, A, C);

3188

int k = 0;

Miao Wang

2015-04-24 11:19:53 -0700

[diff] [blame]

3189

if (Trans == CONJ_TRANSPOSE) {

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3190

k = A.getType().getY();

3191

} else {

3192

k = A.getType().getX();

3193

}

3194

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,

3195

alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);

3196

}

3197

3198

static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {

3199

if (!A.getType().getElement().isCompatible(e) ||

3200

!B.getType().getElement().isCompatible(e) ||

3201

!C.getType().getElement().isCompatible(e)) {

3202

throw new RSRuntimeException("Called BLAS with wrong Element type");

3203

}

3204

validateConjTranspose(Trans);

3205

int cdim = C.getType().getX();

3206

if (cdim != C.getType().getY()) {

3207

throw new RSRuntimeException("Called HER2K with non-square C");

3208

}

3209

if (Trans == NO_TRANSPOSE) {

3210

if (A.getType().getY() != cdim) {

3211

throw new RSRuntimeException("Called HER2K with invalid matrices");

3212

}

3213

} else {

3214

if (A.getType().getX() != cdim) {

3215

throw new RSRuntimeException("Called HER2K with invalid matrices");

3216

}

3217

}

3218

if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {

3219

throw new RSRuntimeException("Called HER2K with invalid A and B matrices");

3220

}

3221

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3222

3223

/**

3224

* CHER2K performs one of the hermitian rank 2k operations

3225

* C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C

3226

*

3227

* Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html

3228

*

3229

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3230

* @param Trans The type of transpose applied to the operation.

3231

* @param alpha The scalar alpha.

3232

* @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.

3233

* @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.

3234

* @param beta The scalar beta.

3235

* @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.

3236

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3237

public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {

3238

validateUplo(Uplo);

3239

validateHER2K(Element.F32_2(mRS), Trans, A, B, C);

3240

int k = 0;

3241

if (Trans == NO_TRANSPOSE) {

3242

k = A.getType().getX();

3243

} else {

3244

k = A.getType().getY();

3245

}

3246

mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,

3247

A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);

3248

}

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3249

3250

/**

3251

* ZHER2K performs one of the hermitian rank 2k operations

3252

* C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C

3253

*

3254

* Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html

3255

*

3256

* @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.

3257

* @param Trans The type of transpose applied to the operation.

3258

* @param alpha The scalar alpha.

3259

* @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.

3260

* @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.

3261

* @param beta The scalar beta.

3262

* @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.

3263

*/

Tim Murray

2015-01-12 16:47:56 -0800

[diff] [blame]

3264

public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {

3265

validateUplo(Uplo);

3266

validateHER2K(Element.F64_2(mRS), Trans, A, B, C);

3267

int k = 0;

3268

if (Trans == NO_TRANSPOSE) {

3269

k = A.getType().getX();

3270

} else {

3271

k = A.getType().getY();

3272

}

3273

mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,

3274

A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);

}

Tim Murray

2015-04-01 11:07:16 -0700

[diff] [blame]

3278

/**

Miao Wang

d7d413a

2015-07-15 11:35:28 -0700

[diff] [blame]

3279

* 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3280

* Calculations are done in 1.10.21 fixed-point format for the final output,

3281

* just before there's a shift down to drop the fractional parts. The output

3282

* values are gated to 0 to 255 to fit in a byte, but the 10-bit format

3283

* gives some headroom to avoid wrapping around on small overflows.

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3284

*

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3285

* @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3286

* @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3287

* @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3288

* @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.

Miao Wang

2015-05-12 18:22:20 -0700

[diff] [blame]

3289

* @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.

3290

* @param c_offset The offset for all values in matrix C.

3291

* @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.

Tim Murray

9cb16a2

2015-04-01 11:07:16 -0700

[diff] [blame]

3292

**/

3293

public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {

3294

validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);

3295

Miao Wang

2015-06-29 17:43:03 -0700

[diff] [blame]

3296

if (a_offset < 0 || a_offset > 255) {

3297

throw new RSRuntimeException("Invalid a_offset passed to BNNM");

3298

}

3299

if (b_offset < 0 || b_offset > 255) {

3300

throw new RSRuntimeException("Invalid b_offset passed to BNNM");

3301

}

Tim Murray

9cb16a2

2015-04-01 11:07:16 -0700

[diff] [blame]

3302

int M = -1, N = -1, K = -1;

3303

M = A.getType().getY();

3304

N = B.getType().getY();

3305

K = A.getType().getX();

3306

3307

3308

mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult);

3309

3310

}

Tim Murray